twoafternoon.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_short_description = col_character(),
## perc_ID = col_double()
## )
twoafternoon.any.trtlive.DEGs.all.v3.0anno<-read_csv(file=file.path("..","output","twoafternoon.any.trtlive.DEGs.all.v3.0anno.csv"))
## Parsed with column specification:
## cols(
## genes = col_character(),
## logFC.soil_trtSBC_OLD = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day03 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day04 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day06 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day08 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day10 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day13 = col_double(),
## logFC.soil_trtSBC_OLD.sampling_day14 = col_double(),
## logCPM = col_double(),
## LR = col_double(),
## PValue = col_double(),
## FDR = col_double(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_short_description = col_character(),
## perc_ID = col_double()
## )
# select genes with higher CV
## classic way
co.var.df <- function(x) ( 100*apply(x,1,sd)/rowMeans(x) )
cpm.timecourse.v3.0$cv<-co.var.df(cpm.timecourse.v3.0[,-1])
# tidyverse way (no working)
#cpm.timecourse.v3.0 %>% slice(1:100) %>% select(-1) %>% group_by(%>% mutate(cv=map(.,co.var.df ))
a<-hist(cpm.timecourse.v3.0$cv)
a
## $breaks
## [1] 0 50 100 150 200 250 300 350 400 450 500 550 600 650 700
##
## $counts
## [1] 19889 5976 989 270 87 45 24 12 7 4 2 1
## [13] 1 1
##
## $density
## [1] 1.456643e-02 4.376739e-03 7.243299e-04 1.977443e-04 6.371759e-05
## [6] 3.295738e-05 1.757727e-05 8.788633e-06 5.126703e-06 2.929544e-06
## [11] 1.464772e-06 7.323861e-07 7.323861e-07 7.323861e-07
##
## $mids
## [1] 25 75 125 175 225 275 325 375 425 475 525 575 625 675
##
## $xname
## [1] "cpm.timecourse.v3.0$cv"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# there are genes with extream value
cpm.timecourse.v3.0 %>% filter(cv>600)
# Check expression pattern
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = cpm.timecourse.v3.0 %>% dplyr::filter(cv>450) %>% dplyr::slice(1:20)) ->p
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
p
## Error in eval(expr, envir, enclos): object 'p' not found
ggsave(filename="../output/highCV.absvalue.genes.expression.png",width=11,height=8) # should I remove them????
#
sum(as.integer(cpm.timecourse.v3.0$cv>30))/dim(cpm.timecourse.v3.0)[1] # [1] 0.5207265
## [1] 0.5207265
sum(as.integer(cpm.timecourse.v3.0$cv>40))/dim(cpm.timecourse.v3.0)[1] # [1] 0.3725282. Larger CV than SAS timecourse data ()??? Due to non log absolute expression value.
## [1] 0.3725282
# cf. sum(as.integer(SAS.expression.vst.s.kazu$cv>4.5))/dim(SAS.expression.vst.s.kazu)[1] #[1] 0.2300789
cpm.timecourse.v3.0.log$cv<-co.var.df(cpm.timecourse.v3.0.log[,-1])
b<-hist(cpm.timecourse.v3.0.log$cv)
b
## $breaks
## [1] -300000 -280000 -260000 -240000 -220000 -200000 -180000 -160000 -140000
## [10] -120000 -100000 -80000 -60000 -40000 -20000 0 20000 40000
## [19] 60000 80000 100000 120000 140000
##
## $counts
## [1] 1 0 0 1 1 0 0 0 1 0 1 1
## [13] 1 7 1648 25634 7 1 1 2 0 1
##
## $density
## [1] 1.830965e-09 0.000000e+00 0.000000e+00 1.830965e-09 1.830965e-09
## [6] 0.000000e+00 0.000000e+00 0.000000e+00 1.830965e-09 0.000000e+00
## [11] 1.830965e-09 1.830965e-09 1.830965e-09 1.281676e-08 3.017431e-06
## [16] 4.693496e-05 1.281676e-08 1.830965e-09 1.830965e-09 3.661931e-09
## [21] 0.000000e+00 1.830965e-09
##
## $mids
## [1] -290000 -270000 -250000 -230000 -210000 -190000 -170000 -150000 -130000
## [10] -110000 -90000 -70000 -50000 -30000 -10000 10000 30000 50000
## [19] 70000 90000 110000 130000
##
## $xname
## [1] "cpm.timecourse.v3.0.log$cv"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
# use largeCV
cpm.timecourse.v3.0.log.largeCV<-cpm.timecourse.v3.0.log[cpm.timecourse.v3.0[cpm.timecourse.v3.0$cv>40,"transcript_ID"],]
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262 289 > [1] 10173 290 (02/01/2020) (cf. SAS.expression.vst.s.kazu.largeCV is 7025 288)
## [1] 10173 290
c<-hist(cpm.timecourse.v3.0.log.largeCV$cv)
c
## $breaks
## [1] -30000 -25000 -20000 -15000 -10000 -5000 0 5000 10000 15000
## [11] 20000 25000 30000 35000
##
## $counts
## [1] 1 0 3 2 6 348 5692 6 4 1 1 0 1
##
## $density
## [1] 3.297609e-08 0.000000e+00 9.892828e-08 6.595218e-08 1.978566e-07
## [6] 1.147568e-05 1.876999e-04 1.978566e-07 1.319044e-07 3.297609e-08
## [11] 3.297609e-08 0.000000e+00 3.297609e-08
##
## $mids
## [1] -27500 -22500 -17500 -12500 -7500 -2500 2500 7500 12500 17500
## [11] 22500 27500 32500
##
## $xname
## [1] "cpm.timecourse.v3.0.log.largeCV$cv"
##
## $equidist
## [1] TRUE
##
## attr(,"class")
## [1] "histogram"
###########
#save(cpm.timecourse.v3.0.log.largeCV,file=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
write_csv(cpm.timecourse.v3.0.log.largeCV,path=file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
# The following setting is important, do not omit.
library(WGCNA) # errors in installing WGCNA on my computer at impute package installation (Jan 27, 2020). Use Whitney
options(stringsAsFactors = FALSE)
if(Sys.info()["nodename"]=="whitney") {
enableWGCNAThreads(10) # in Whitney (Maloof lab server)
} else if (Sys.info()["nodename"]=="Kazu-MBP.plb.ucdavis.edu") {
enableWGCNAThreads(2) # in my computer
}
#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
# for some reasons in Whitney library columns were read ad character. Needs to fix it.
#cpm.timecourse.v3.0.log.largeCV<-read_csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"),
# col_types=list(col_character(),col_double())) # error
cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz")) # using classic read.csv in Whitney
#load(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.Rdata"))
#
datExpr <-t(cpm.timecourse.v3.0.log.largeCV[,-1])
# Choose a set of soft-thresholding powers
powers = c(c(1:9), seq(from = 2, to=20, by=10))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
# Plot the results:
#sizeGrWindow(9, 5)
pdf("../output/largeCV.softthresholding.pdf",width=10,height=8)
par(mfrow = c(1,2));
cex1 = 0.9;
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
#
net = blockwiseModules(datExpr, power = 9,
TOMType = "unsigned", minModuleSize = 20,
reassignThreshold = 0, mergeCutHeight = 0.25,
numericLabels = TRUE, pamRespectsDendro = FALSE,
saveTOMs = TRUE,
saveTOMFileBase = "cpm.timecourse.v3.0.log.largeCV.TOM",
verbose = 3)
save(net,file="../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")
# open a graphics window
pdf(file="../output/largeCV.dendrogram.pdf",width=10,height=8)
# Convert labels to colors for plotting
mergedColors = labels2colors(net$colors)
# Plot the dendrogram and the module colors underneath
plotDendroAndColors(net$dendrograms[[1]], mergedColors[net$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
# save parameters
moduleLabels = net$colors
moduleColors = labels2colors(net$colors)
MEs = net$MEs
geneTree = net$dendrograms[[1]]
save(MEs, moduleLabels, moduleColors, geneTree,file ="../output/all.largeCV.RData")
cpm.timecourse.v3.0.log.largeCV<-read.csv(file.path("..","output","cpm.timecourse.v3.0.log.largeCV.csv.gz"))
dim(cpm.timecourse.v3.0.log.largeCV) # [1] 17262 289 -> [1] 10173 290 (Feb 01, 2020)
## [1] 10173 290
load("../output/net.cpm.timecourse.v3.0.log.largeCV.Rdata")
load("../output/all.largeCV.RData")
# how many modules?
table(net$colors);length(table(net$colors)) # 7 modules
##
## 0 1 2 3 4 5 6
## 4968 4723 174 126 79 72 31
## [1] 7
cpm.timecourse.v3.0.log.largeCV.modules <- tibble(
transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,
modules=moduleColors
)
#cpm.timecourse.v3.0.log.largeCV.modules.list<-list(transcript_ID=cpm.timecourse.v3.0.log.largeCV$transcript_ID,modules=moduleColors)
## prep
# annotation file for v3.0annotation
Br.v3.0.At.BLAST <- read_csv(file.path("..","Annotation_copy","output","v3.0annotation","Brapa_v3.0_annotated.csv"))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_double(),
## name = col_character(),
## chrom = col_character(),
## subject = col_character(),
## AGI = col_character(),
## At_symbol = col_character(),
## At_full_name = col_character(),
## At_gene_model_type = col_character(),
## At_short_description = col_character(),
## At_Curator_summary = col_character(),
## At_Computational_description = col_character()
## )
## See spec(...) for full column specifications.
# This annotation is redundant with name (Br grene). Eg
Br.v3.0.At.BLAST %>% filter(name=="BraA01g040570.3C")
# reduce the redundancy (112418)
Br.v3.0anno.At.BLAST.highscore <- Br.v3.0.At.BLAST %>% group_by(name) %>% arrange(desc(score)) %>% dplyr::slice(1)
# function for adding annotation
## get object name https://stackoverflow.com/questions/14577412/how-to-convert-variable-object-name-into-string
myfunc <- function(v1) {
deparse(substitute(v1))
}
myfunc(foo)
## [1] "foo"
# adding annotation and write_csv adding ".v3.0anno.csv" to the object name.
addAnno<-function(DGE) {temp<-left_join(DGE %>% rownames_to_column(var="genes"),Br.v3.0anno.At.BLAST.highscore,by=c(genes="name")) %>% dplyr::select(genes,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)}
#Br.v3.0anno.At.BLAST.highscore.list<-list()
Bra.v3.0_cdna.list<-list()
#names(Bra.v3.0_cdna.list)<-names(Bra.v3.0_cdna)
names(Bra.v3.0_cdna) %in% cpm.timecourse.v3.0.log.largeCV.modules$transcript_ID
for(i in 1:length(Bra.v3.0_cdna)) {
print(paste("i is ",i))
print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID))
print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim())
print(cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==as_vector(names(Bra.v3.0_cdna))[i]) %>% dplyr::select(transcript_ID) %>% dim() ==c(1,1))
temp<-cpm.timecourse.v3.0.log.largeCV.modules %>% dplyr::filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(transcript_ID)
print(dim(temp)[1]==0)
if(dim(temp)[1]==0) next else
#Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules[names(Bra.v3.0_cdna)[i],"modules"]
# input module
Bra.v3.0_cdna.list[[i]]<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID==names(Bra.v3.0_cdna)[i]) %>% dplyr::select(modules) %>% as_vector()
# iput gene name
names(Bra.v3.0_cdna.list)[[i]]<-names(Bra.v3.0_cdna)[i]
}
# clean up Brgo.v3.0_cdna.list
table(sapply(Bra.v3.0_cdna.list,is.null))
Bra.v3.0_cdna.list<-Bra.v3.0_cdna.list[!sapply(Bra.v3.0_cdna.list,is.null)]
table(sapply(Bra.v3.0_cdna.list,is.null))
save(Bra.v3.0_cdna.list,file="../output/Bra.v3.0_cdna.list.Rdata")
######### Did not work
# cpm.timecourse.v3.0.log.largeCV.modules %>% nest(transcript_ID) # this is not what I want
# library(purrr)
#cpm.timecourse.v3.0.log.largeCV.modules %>% purrr::transpose()
# loading module info as custom categories compatible with goseq()
load("../output/Bra.v3.0_cdna.list.Rdata")
# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following object is masked from 'package:limma':
##
## plotMA
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: BiocParallel
## Loading required package: Biostrings
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
## Loading required package: XVector
##
## Attaching package: 'XVector'
## The following object is masked from 'package:purrr':
##
## compact
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
## Loading required package: Rsamtools
## Loading required package: GenomeInfoDb
## Loading required package: GenomicRanges
## Loading required package: GenomicAlignments
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: DelayedArray
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
##
## colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
##
## simplify
## The following objects are masked from 'package:base':
##
## aperm, apply, rowsum
##
## Attaching package: 'GenomicAlignments'
## The following object is masked from 'package:dplyr':
##
## last
##
## Attaching package: 'ShortRead'
## The following object is masked from 'package:dplyr':
##
## id
## The following object is masked from 'package:purrr':
##
## compose
## The following object is masked from 'package:tibble':
##
## view
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
## Attaching package: 'geneLenDataBase'
## The following object is masked from 'package:S4Vectors':
##
## unfactor
##
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
## A DNAStringSet instance of length 46250
## width seq names
## [1] 1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2] 1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3] 957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4] 1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5] 774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## ... ... ...
## [46246] 162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247] 1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248] 1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249] 870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250] 1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# special funciton for GOseq
GOseq.customcategory.ORA<-function(genelist,padjust=0.05,custom.category.list=Bra.v3.0_cdna.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05.
bias<-nchar(Br_cdna)
names(bias)<-names(Br_cdna)
TF<-(names(bias) %in% genelist)*1
names(TF)<-names(bias)
#print(TF)
pwf<-nullp(TF,bias.data=bias)
#print(pwf$DEgenes)
GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
#GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
GO.pval$over_represented_padjust<-p.adjust(GO.pval$over_represented_pvalue,method="BH")
if(GO.pval$over_represented_padjust[1]>padjust) return("no enriched GO")
else {
enriched.GO<-GO.pval[GO.pval$over_represented_padjust<padjust,]
print("enriched.GO is")
print(enriched.GO)
return(enriched.GO)
}
}
gene.up<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC>0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()
gene.down<-twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(logFC<0&FDR<0.05) %>% dplyr::select(genes) %>% as_vector()
enriched.GO.up<-GOseq.customcategory.ORA(genelist=gene.up) # needs to wait for Bra.v3.0_cdna.list.Rdata ready in Whitney
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 13 lightyellow 5.053465e-08 1.0000000 7
## 21 tan 1.651936e-04 0.9999827 6
## numInCat over_represented_padjust
## 13 32 1.162297e-06
## 21 73 1.899727e-03
enriched.GO.down<-GOseq.customcategory.ORA(genelist=gene.down)
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 23 yellow 4.518946e-176 1.0000000 149
## 16 pink 2.553492e-32 1.0000000 40
## 12 lightgreen 1.895763e-05 0.9999984 7
## 17 purple 1.741131e-03 0.9995694 9
## numInCat over_represented_padjust
## 23 242 1.039358e-174
## 16 128 2.936516e-31
## 12 33 1.453418e-04
## 17 101 1.001150e-02
n<-1
gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes=gene.up.category[1:10,]) # works
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
# scaling expression data
cpm.timecourse.v3.0.scale<-t(scale(t(cpm.timecourse.v3.0[,-1]))) %>% as_tibble() %>% bind_cols(data.frame(transcript_ID=cpm.timecourse.v3.0$transcript_ID[]),.)
gene.up.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.up,modules==enriched.GO.up$category[n])
gene.down.category<-cpm.timecourse.v3.0.log.largeCV.modules %>% filter(transcript_ID %in% gene.down,modules==enriched.GO.up$category[n])
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(data=cpm.timecourse.v3.0.scale,target.genes=gene.up.category[1,])
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
input<-tribble(
~target.genes,~data,~f,
gene.up.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,
gene.down.category[1:10,],cpm.timecourse.v3.0.scale,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2
)
input2<-tribble(
~f,~param,
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.up.category[1:10,],data=cpm.timecourse.v3.0.scale),
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2,list(target.genes=gene.down.category[1:10,],data=cpm.timecourse.v3.0.scale)
)
test<-input2 %>% mutate(output=invoke_map(f,param)) # works, but parameters are not visible
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
# how about to use map2?
## an example
params<-tribble(
~mean,~sd,~n,
5,1,1,
10,5,3,
-3,10,5
)
params %>% pmap(rnorm)
## [[1]]
## [1] 6.707234
##
## [[2]]
## [1] 10.660708 15.642700 3.259823
##
## [[3]]
## [1] -8.105668 -14.774122 5.708235 22.881898 10.536788
#
input3<-tribble(
~target.genes,~data,~title,
gene.up.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil up",
gene.down.category[1:10,],cpm.timecourse.v3.0.scale,"2-afternoon soil down",
)
#input3 %>% pmap(expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2) -> expression.pattern
#
expression.pattern <- input3 %>% mutate(plot=pmap(.,expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2))
## Error in transcript_ID %in% target.genes$transcript_ID: object 'target.genes' not found
expression.pattern$plot[1] # plot
## Error in eval(expr, envir, enclos): object 'expression.pattern' not found
#input3 %>% mutate(plot=invoke_map(~expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2)) # errors
temp.abs<-cpm.timecourse.v3.0.log %>% head() %>%
gather(sample,value,-transcript_ID) %>%
mutate(abs.value=2^value) %>%
inner_join(sample.description.timecourse, by="sample") %>%
split(.$soil_trt)
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil)
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time"),by="group")
# check logFC range
range(cpm.timecourse.v3.0.logFC$logFC) #(Jan 31, 2020)
temp.abs<-cpm.timecourse.v3.0.log %>%
gather(sample,value,-transcript_ID) %>% mutate(abs.value=2^value) %>%
inner_join(sample.description.timecourse, by="sample") %>%
split(.$soil_trt)
# mean of absolute value funciton
mean.by.soil<-function(x) {group_by(x, group,transcript_ID) %>% summarize(mean=mean(abs.value))}
# calculating absolute value mean
temp.abs.mean<-temp.abs %>% map(.,mean.by.soil)
# making summary tibble
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# add sample info
cpm.timecourse.v3.0.logFC<-tibble(transcript_ID=temp.abs.mean[["SBC_OLD"]]$transcript_ID,group=temp.abs.mean[["SBC_OLD"]]$group,logFC=log(temp.abs.mean[["SBC_OLD"]]$mean/temp.abs.mean[["ATM_BLANK"]]$mean)) %>% left_join(sample.description.timecourse.logFC,by="group")
# check
dim(cpm.timecourse.v3.0.logFC)
# check frequency distribution
a<-hist(cpm.timecourse.v3.0.logFC$logFC) # most of them are small
a
# what are genes with super high logFC?
high.FC.genes<-cpm.timecourse.v3.0.logFC %>% filter(abs(logFC)>5) %>% dplyr::select(transcript_ID)
expression.pattern.Br.graph.timecourse.v3.0annotation.cpm.2(target.genes = high.FC.genes[1:10,])
#
addAnno2<-function(DGE) {temp<-left_join(DGE,Br.v3.0anno.At.BLAST.highscore,by=c("transcript_ID"="name")) %>% dplyr::select(transcript_ID,names(DGE),AGI, At_symbol, At_short_description, perc_ID); print(deparse(substitute(DGE)));
write_csv(temp, path=file.path("..","output",paste(deparse(substitute(DGE)),".v3.0anno.csv",sep="")));
return(temp)}
#
addAnno2(high.FC.genes)
#
dim(cpm.timecourse.v3.0.logFC) #[1] 1110000 5
#write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv") # too large (306 M)
write_csv(cpm.timecourse.v3.0.logFC,path="../output/cpm.timecourse.v3.0.logFC.csv.gz") # 12.3 M
cpm.timecourse.v3.0.logFC <-read_csv("../output/cpm.timecourse.v3.0.logFC.csv.gz")
## Parsed with column specification:
## cols(
## transcript_ID = col_character(),
## group = col_character(),
## logFC = col_double(),
## sampling_day = col_character(),
## sampling_time = col_character()
## )
target.genes<-gene.up
# expression.pattern.Br.graph.timecourse.v3.0annotation.logFC<-function(data=cpm.timecourse.v3.0.logFC,target.genes,title="",subset.data="only_two_afternoon"){
# #print(paste("data is",data[1:10,]))
# #print(paste("tissue.type is root"))
# data[is.na(data)] <- 0 #
# data.temp<-data %>% dplyr::filter(transcript_ID %in% target.genes)
#
# # if (2-afternoon=TRUE)
# if (subset.data=="only_two_afternoon") {
# p<-data.temp %>% ggplot(aes(x=sampling_day,y=logFC)) +
# geom_boxplot(alpha = 0.5) +
# theme_bw() +
# theme(strip.text.y=element_text(angle=0),axis.text.x=element_text(angle=90)) +
# theme(legend.position="bottom") + labs(title=title)
# p
# } else {print("Define subset.data other than only_two_afternoon.")}
# }
# test the function
expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")
# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>%
inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread) # [1] 1442 97
## [1] 1442 97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1],
centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
Let’s perform the actual clsutering using K=8:
set.seed(20)
kClust.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.8 <- kClust.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.8) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
clust.centroid = function(i, dat, clusters) {
ind = (clusters == i)
colMeans(dat[ind,])
}
kClustcentroids.8 <- sapply(levels(factor(kClusters.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.8)
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:SummarizedExperiment':
##
## trim
## The following object is masked from 'package:GenomicRanges':
##
## trim
## The following object is masked from 'package:Biostrings':
##
## collapse
## The following objects are masked from 'package:IRanges':
##
## collapse, trim
## The following object is masked from 'package:dplyr':
##
## collapse
# adding sample description to data
data.sample<-kClustcentroids.8 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.8.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)])
# plot
p8<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level")
p8
ggsave(p8,file="../output/Twoafternoon.DEG.Kmean.8clusters.png",width=11,height=8)
set.seed(20)
kClust.5 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.5 <- kClust.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way
kClustcentroids.5 <- sapply(levels(factor(kClusters.5)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread[,-1], kClusters.5)
# adding sample description to data
data.sample<-kClustcentroids.5 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.5.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:480)[!duplicated(.$group.cluster)]) # only cluster 1... why???
# plot
p5<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level")
p5
ggsave(p5,file="../output/Twoafternoon.DEG.Kmean.5clusters.png",width=11,height=8)
expression.pattern.Br.graph.timecourse.v3.0annotation.logFC(target.genes=gene.up,subset.data="only_two_afternoon")
# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.logFC.twoafternoon.DEG<-cpm.timecourse.v3.0.logFC %>%
inner_join(twoafternoon.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>% filter(sampling_time=="2_afternoon")
# spread
cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread<-cpm.timecourse.v3.0.logFC.twoafternoon.DEG %>% dplyr::select(transcript_ID,group,logFC) %>% spread(group,logFC,-1)
dim(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread) # [1] 1474 97
## [1] 1442 9
# calculate wss
wss.logFC <- (nrow(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],2,var))
for (i in 2:20) wss.logFC[i] <- sum(kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1],
centers=i,iter.max = 10)$withinss,iter.max=20) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
## Warning: did not converge in 10 iterations
plot(1:20, wss.logFC, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
# Let’s perform the actual clsutering using K=5:
set.seed(20)
kClust.logFC.5 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=5, nstart = 1000, iter.max = 20)
kClusters.logFC.5 <- kClust.logFC.5$cluster
# number of clusters
cluster.5.num<-tibble(cluster=kClusters.logFC.5) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.5.num$cluster<-as.character(cluster.5.num$cluster) # classic way
kClustcentroids.logFC.5 <- sapply(levels(factor(kClusters.logFC.5)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.5)
# making sample.description.timecourse.logFC
temp2<-sample.description.timecourse %>% dplyr::select("group","sampling_day","sampling_time")
sample.description.timecourse.logFC<-temp2[!duplicated(temp2),]
# plot
p.logFC.5<-kClustcentroids.logFC.5 %>% as_tibble(rownames="group") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse.logFC,by="group") %>%
inner_join(cluster.5.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) ) %>%
ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): five clusters",color = "Cluster",y="scaled expression level")
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.5
ggsave(p.logFC.5,file="../output/Twoafternoon.DEG.logFC.Kmean.5clusters.png",width=11,height=8)
set.seed(20)
kClust.logFC.8 <- kmeans(cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.logFC.8 <- kClust.logFC.8$cluster
# number of clusters
cluster.8.num<-tibble(cluster=kClusters.logFC.8) %>% group_by(cluster) %>% summarize(n=sum(cluster))
cluster.8.num$cluster<-as.character(cluster.8.num$cluster) # classic way
kClustcentroids.logFC.8 <- sapply(levels(factor(kClusters.logFC.8)), clust.centroid, cpm.timecourse.v3.0.logFC.twoafternoon.DEG.spread[,-1], kClusters.logFC.8)
p.logFC.8<-kClustcentroids.logFC.8 %>% as_tibble(rownames="group") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse.logFC,by="group") %>%
inner_join(cluster.8.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) ) %>%
ggplot(aes(x=sampling_day,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
facet_grid(cluster.n~.) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level")
## Warning: Column `group` joining character vector and factor, coercing into
## character vector
p.logFC.8
ggsave(p.logFC.8,file="../output/Twoafternoon.DEG.logFC.Kmean.8clusters.png",width=11,height=8)
load(file.path("..","Annotation_copy","output","v3.0annotation","Brgo.v3.0anno.Atgoslim.BP.list.Rdata"))
# GOseq
library(ShortRead);library(goseq);library(GO.db);library("annotate")
# for ggplot heatmap
## uncompress gz file
system(paste("gunzip -c ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.gz")," > ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")))
## read cDNA fasta file
Bra.v3.0_cdna<-readDNAStringSet(file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa")) # copied from /Volumes/data_work/Data8/NGS_related/Brassica_rapa_Upendra/G3
Bra.v3.0_cdna
## A DNAStringSet instance of length 46250
## width seq names
## [1] 1254 ATGCGACCACCGGGTGTTGTT...GAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2] 1668 ATGCCAGCAATGCATGCCGTT...AGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3] 957 ATGATGCTTCTCGTTCATACC...AACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4] 1299 ATGAGTCGTCTTCTCCTTGCT...GGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5] 774 ATGGATTCTGGGCTTCAGCAT...GGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## ... ... ...
## [46246] 162 ATGCGTCCGTCCTCAGCTCCC...TCTTTGGTGGTCCGGTTCTAA BraAnng001840.3C
## [46247] 1455 ATGTCTAATCAAGGATCAGGA...ACAGGTTTGTTTAGGTGCTAA BraAnng001850.3C
## [46248] 1011 ATGGACAACGTAATTCTGAAA...TCAGGGAAGAAAAGCCCCTGA BraAnng006150.3C
## [46249] 870 ATGTTTCCAAGACGTACAAGG...AGCAGTTGTCCTTATAGTTAG BraAnng000040.3C
## [46250] 1338 ATGCCGCAACAATACTGGAAC...GGAGAGAACCTTATCTCCTGA BraAnng003440.3C
## remove fasta file
system(paste("rm ",file.path("..","Annotation_copy","input","v3.0annotation","Brapa_genome_v3.0_cds.fa"),sep=""))
# GOseq function
GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA<-function(genelist,padjust=0.05,ontology="BP",custom.category.list=Brgo.v3.0anno.Atgoslim.BP.list,Br_cdna=Bra.v3.0_cdna) { # return GO enrichment table, padjus, padjust=0.05.
bias<-nchar(Br_cdna)
names(bias)<-names(Br_cdna)
TF<-(names(bias) %in% genelist)*1
names(TF)<-names(bias)
#print(TF)
pwf<-nullp(TF,bias.data=bias)
#print(pwf$DEgenes)
GO.pval <- goseq(pwf,gene2cat=custom.category.list,use_genes_without_cat=TRUE) # format became different in new goseq version (021111). Does not work (042716)
#GO.pval <- goseq(pwf,gene2cat=Brgo.DF3,use_genes_without_cat=TRUE) # format became different in new goseq version (021111)
#head(GO.pval)
if(ontology=="BP") {
GO.pval2<-subset(GO.pval,ontology=="BP")
} else if(ontology=="CC") {
GO.pval2<-subset(GO.pval,ontology=="CC")
} else {
GO.pval2<-subset(GO.pval,ontology=="MF")
}
GO.pval2$over_represented_padjust<-p.adjust(GO.pval2$over_represented_pvalue,method="BH")
if(GO.pval2$over_represented_padjust[1]>padjust) return("no enriched GO")
else {
enriched.GO<-GO.pval2[GO.pval2$over_represented_padjust<padjust,]
print("enriched.GO is")
print(enriched.GO)
## write Term and Definition
for(i in 1:dim(enriched.GO)[1]) {
if(is.null(Term(GOTERM[enriched.GO[i,"category"]]))) {next} else {
enriched.GO$Term[i]<-Term(GOTERM[[enriched.GO[i,"category"]]])
enriched.GO$Definition[i]<-Definition(GOTERM[[enriched.GO[i,"category"]]])
}
}
return(enriched.GO)
}
}
#
head(Bra.v3.0_cdna)
## A DNAStringSet instance of length 6
## width seq names
## [1] 1254 ATGCGACCACCGGGTGTTGTTTC...CTGAGTCTCTCTTGCTCGCTTAA BraA01g000010.3C
## [2] 1668 ATGCCAGCAATGCATGCCGTTTT...GTAGATGGATCACAAAAGATTAA BraA01g000020.3C
## [3] 957 ATGATGCTTCTCGTTCATACCCG...GGAACTTGGAGTTCCCTGAGTGA BraA01g000030.3C
## [4] 1299 ATGAGTCGTCTTCTCCTTGCTCA...GTGGGTCACGAGATGAGCTATAA BraA01g000040.3C
## [5] 774 ATGGATTCTGGGCTTCAGCATCT...AAGGAAAGCAGTTCCTTTCGTGA BraA01g000050.3C
## [6] 3327 ATGGCGTCCACTCCTCCTCAAAA...GCGGTGGGTTTCAATTTCCTTGA BraA01g000060.3C
# length(bias) # 44239 > 45019 where the bias come from?
# bias.data vector must have the same length as DEgenes vector!
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.DEG.spread$transcript_ID, cluster=kClusters.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 1.732470e-08 1.0000000 18
## 423 GO:0006468 1.855801e-08 1.0000000 29
## 2750 GO:0050832 1.926103e-08 1.0000000 14
## 1858 GO:0031348 2.494123e-05 0.9999987 5
## 638 GO:0006952 3.373325e-05 0.9999904 18
## 890 GO:0009611 4.127141e-05 0.9999930 10
## 3442 GO:1900067 5.827127e-05 0.9999999 2
## 1000 GO:0009814 6.360706e-05 0.9999976 4
## 921 GO:0009651 6.434943e-05 0.9999832 15
## 3242 GO:0080119 1.300857e-04 0.9999973 3
## numInCat term ontology
## 2253 726 defense response to bacterium BP
## 423 1484 protein phosphorylation BP
## 2750 469 defense response to fungus BP
## 1858 64 negative regulation of defense response BP
## 638 1165 defense response BP
## 890 419 response to wounding BP
## 3442 3 regulation of cellular response to alkaline pH BP
## 1000 49 defense response, incompatible interaction BP
## 921 1045 response to salt stress BP
## 3242 17 ER body organization BP
## over_represented_padjust
## 2253 2.432668e-05
## 423 2.432668e-05
## 2750 2.432668e-05
## 1858 2.362558e-02
## 638 2.556305e-02
## 890 2.606289e-02
## 3442 2.709111e-02
## 1000 2.709111e-02
## 921 2.709111e-02
## 3242 4.928947e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 390 GO:0006412 3.80768e-07 1 11
## numInCat term ontology over_represented_padjust
## 390 715 translation BP 0.00144273
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 3.409045e-11 1 7
## 2259 GO:0042761 1.726215e-08 1 5
## 1181 GO:0010143 1.931256e-06 1 4
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 2259 30 very long-chain fatty acid biosynthetic process BP
## 1181 29 cutin biosynthetic process BP
## over_represented_padjust
## 1291 1.291687e-07
## 2259 3.270314e-05
## 1181 2.439177e-03
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200 3.659965e-11 1.0000000 17
## 2253 GO:0042742 2.432098e-08 1.0000000 23
## 1174 GO:0010120 7.293180e-08 1.0000000 6
## 1166 GO:0010112 6.780014e-07 1.0000000 5
## 638 GO:0006952 8.277917e-07 0.9999998 27
## 899 GO:0009626 2.954187e-06 0.9999997 9
## 894 GO:0009617 3.846114e-06 0.9999994 11
## 890 GO:0009611 6.457569e-06 0.9999987 14
## 855 GO:0009409 1.517719e-05 0.9999961 17
## 944 GO:0009697 2.834944e-05 0.9999992 4
## 2750 GO:0050832 3.456585e-05 0.9999924 13
## 1205 GO:0010193 5.358930e-05 0.9999968 5
## 960 GO:0009737 7.818971e-05 0.9999759 18
## 187 GO:0002237 8.857740e-05 0.9999917 6
## 185 GO:0002229 1.515927e-04 0.9999802 7
## numInCat term ontology
## 1210 286 response to chitin BP
## 2253 726 defense response to bacterium BP
## 1174 27 camalexin biosynthetic process BP
## 1166 22 regulation of systemic acquired resistance BP
## 638 1165 defense response BP
## 899 140 plant-type hypersensitive response BP
## 894 241 response to bacterium BP
## 890 419 response to wounding BP
## 855 696 response to cold BP
## 944 21 salicylic acid biosynthetic process BP
## 2750 469 defense response to fungus BP
## 1205 54 response to ozone BP
## 960 832 response to abscisic acid BP
## 187 88 response to molecule of bacterial origin BP
## 185 126 defense response to oomycetes BP
## over_represented_padjust
## 1210 1.386761e-07
## 2253 4.607609e-05
## 1174 9.211286e-05
## 1166 6.273006e-04
## 638 6.273006e-04
## 899 1.865569e-03
## 894 2.081846e-03
## 890 3.058466e-03
## 855 6.389597e-03
## 944 1.074160e-02
## 2750 1.190636e-02
## 1205 1.692082e-02
## 960 2.278929e-02
## 187 2.397284e-02
## 185 3.829231e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 3164 GO:0071732 8.264952e-07 1.0000000 5
## 3114 GO:0071369 1.490655e-06 1.0000000 5
## 3094 GO:0071281 7.588090e-06 0.9999997 5
## 254 GO:0006096 9.647819e-06 0.9999994 6
## numInCat term ontology
## 3164 52 cellular response to nitric oxide BP
## 3114 62 cellular response to ethylene stimulus BP
## 3094 77 cellular response to iron ion BP
## 254 138 glycolytic process BP
## over_represented_padjust
## 3164 0.002824047
## 3114 0.002824047
## 3094 0.009138897
## 254 0.009138897
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.trtsoil.DEG.Kmeans.cluster.csv")
# 2_afternoon DEG expression data (scaled)
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG<-cpm.timecourse.v3.0.scale %>% dplyr::select(-cv) %>%
inner_join(twoafternoon.any.trtlive.DEGs.all.v3.0anno %>% filter(FDR<0.05) %>% dplyr::select(genes),by=c(transcript_ID="genes")) %>%
gather(sample,value,-1) %>% inner_join(sample.description.timecourse,by="sample") %>% filter(sampling_time=="2_afternoon")
## Warning: Column `transcript_ID`/`genes` joining factor and character vector,
## coercing into character vector
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# spread
cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread<-cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG %>% dplyr::select(transcript_ID,sample,value) %>% spread(sample,value,-1)
dim(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread) # [1] 2178 97
## [1] 2178 97
# calculate wss
wss <- (nrow(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1])-1)*sum(apply(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],2,var))
for (i in 2:20) wss[i] <- sum(kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1],
centers=i,iter.max = 10)$withinss) # If default iter.max=10 gave me "did not converge in 10 iterations" error. Solution: https://r.789695.n4.nabble.com/kmeans-quot-did-not-converge-in-10-iterations-quot-td797019.html.
plot(1:20, wss, type="b", xlab="Number of Clusters", ylab="Within groups sum of squares")
Let’s perform the actual clsutering using K=8:
set.seed(20)
kClust.any.trtlive.8 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=8, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.8 <- kClust.any.trtlive.8$cluster
# number of clusters
cluster.any.trtlive.8.num<-tibble(cluster=kClusters.any.trtlive.8) %>% group_by(cluster) %>% summarize(n=n())
cluster.any.trtlive.8.num$cluster<-as.character(cluster.any.trtlive.8.num$cluster) # classic way
cluster.any.trtlive.8.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
clust.centroid = function(i, dat, clusters) {
ind = (clusters == i)
colMeans(dat[ind,])
}
kClustcentroids.any.trtlive.8 <- sapply(levels(factor(kClusters.any.trtlive.8)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.8)
kClustcentroids.any.trtlive.8
## 1 2 3 4
## 1a1_q_002_S1_R1_001 0.667743321 -0.32765717 -0.046643192 -0.39677662
## 1a3_q_004_S3_R1_001 -0.164654334 -0.35598558 -0.231303948 -0.64957240
## 1a7_q_007_d8_S7_R1_001 -0.099544549 0.07985282 -0.050667391 -0.23201923
## 1a8_q_008_d8_S8_R1_001 0.297267619 -0.41141910 -0.162422020 -0.61640584
## 1c6_q_028_S22_R1_001 0.056987377 -0.44714663 -0.126738544 -0.46592398
## 1d3_q_038_S27_R1_001 -0.064943110 -0.01126279 -0.135680172 -0.46607100
## 1d5_q_042_S29_R1_001 0.200238193 -0.29080793 -0.118800923 -0.38894467
## 1e4_q_055_S36_R1_001 0.003589494 -0.29876284 -0.122716897 -0.42267062
## 1e5_q_056_S37_R1_001 0.726417562 1.43425878 -0.021857251 1.25689847
## 1e6_q_057_S38_R1_001 0.995977726 0.26252132 -0.001403176 0.65860678
## 1e8_q_059_S40_R1_001 0.112858377 -0.18738002 -0.167528572 -0.33182326
## 1f1_q_060_S41_R1_001 0.580798020 -0.44206099 -0.119842108 -0.48519518
## 1f2_q_062_S42_R1_001 1.390453319 -0.50086543 -0.106813122 -0.61309121
## 1f3_q_065_S43_R1_001 0.309852659 -0.35553902 -0.164473983 -0.70604376
## 1f4_q_066_S44_R1_001 -0.275865455 0.84091426 -0.041472560 0.10389319
## 1f5_q_068_S45_R1_001 0.218063039 1.13773485 -0.035367142 0.76190021
## 1g3_q_075_S51_R1_001 0.170652760 -0.49476679 -0.254659555 -0.86152704
## 1g7_q_082_S55_R1_001 0.989460365 0.04695988 0.067401377 0.36856141
## 1h3_q_086_d8_S59_R1_001 -0.049775972 -0.38980204 -0.163234526 -0.72735124
## 1h4_q_090_d8_S60_R1_001 0.177758325 -0.26796944 -0.192104972 -0.59821603
## 1i2_q_104_S66_R1_001 0.448865079 1.41475065 -0.031466952 1.20248943
## 1i3_q_105_S67_R1_001 -0.051509530 -0.53810160 -0.187382050 -0.85025716
## 1i7_q_110_S71_R1_001 0.068749205 -0.49607172 -0.197040461 -0.60838606
## 1j3_q_114_S75_R1_001 0.403202781 -0.32776759 -0.142145451 -0.38805290
## 1j4_q_115_S76_R1_001 0.432092698 -0.40115361 -0.128720389 -0.68790645
## 1j6_q_117_S78_R1_001 1.446221301 -0.35008313 -0.174803857 -0.35626757
## 1j7_q_118_S79_R1_001 -0.028721586 -0.09942786 -0.121350305 -0.45278572
## 1k2_q_121_S82_R1_001 0.156892451 -0.21364902 -0.163163385 -0.16031310
## 1k4_q_127_S84_R1_001 -0.124611222 0.15915694 -0.027022813 -0.17581703
## 1k5_q_128_S85_R1_001 -0.042266884 -0.32432645 -0.124890153 -0.43393167
## 1k6_q_130_S86_R1_001 -0.205451877 -0.26762999 -0.135008473 -0.73030065
## 1k8_q_135_S88_R1_001 1.438110763 -0.57711139 -0.131422307 -0.60401603
## 2a1_q_146_S97_R1_001 -0.290204703 1.50729788 0.033306735 0.66315456
## 2a7_q_153_S103_R1_001 0.121751075 -0.14219194 -0.161296532 -0.22383676
## 2b1_q_156_S105_R1_001 -0.370461684 0.21563344 -0.079958116 -0.38952862
## 2b2_q_158_S106_R1_001 -0.092906871 -0.33983305 -0.129935262 -0.61728673
## 2b3_q_160_S107_R1_001 0.801826670 -0.07853962 -0.063934905 -0.32388388
## 2b7_q_165_S111_R1_001 1.941264814 0.04399119 0.014610245 0.47847149
## 2b8_q_167_S112_R1_001 -0.439497792 0.31449571 -0.124601004 -0.31894277
## 2c1_q_168_S113_R1_001 -0.332622316 -0.37936089 -0.271802644 -0.96239754
## 2c3_q_171_S115_R1_001 0.747974239 0.26976923 -0.147044784 0.83162456
## 2c4_q_172_d8_S116_R1_001 -0.177353848 -0.42401180 -0.269084210 -0.92547133
## 2c5_q_173_S117_R1_001 -0.153468985 -0.21201987 -0.128152266 -0.56160077
## 2c7_q_178_S119_R1_001 0.490658837 -0.13476241 -0.113390560 -0.51540415
## 2d2_q_181_S122_R1_001 -0.038406305 -0.34746080 -0.226491607 -0.92326944
## 2d7_q_189_S127_R1_001 0.076776506 -0.38918324 -0.195755268 -0.59211153
## 2e6_q_200_d8_S134_R1_001 0.148508712 -0.47631926 -0.196953155 -0.93980129
## 2f6_q_212_S142_R1_001 -0.686284077 0.74811782 -0.055292537 -0.18810402
## 2g1_q_217_S145_R1_001 0.093629703 0.92356181 -0.081688975 0.70480619
## 2g4_q_221_S148_R1_001 0.514267158 -0.05354395 -0.122223428 0.13477216
## 2g8_q_229_S152_R1_001 -0.085726859 -0.58175229 -0.241013995 -1.00232529
## 2h1_q_230_S153_R1_001 0.594540848 1.42283734 0.074523033 0.92677550
## 2h4_q_235_S156_R1_001 0.047703914 -0.54772186 -0.230598200 -0.82989457
## 2h7_q_238_S159_R1_001 -0.086813792 -0.36230319 -0.150059576 -0.56791485
## 2i1_q_242_S161_R1_001 1.824516917 -0.50240460 -0.161926754 -0.15096296
## 2i4_q_246_S164_R1_001 0.086726430 -0.27646481 -0.114508669 -0.63570881
## 2i5_q_247_S165_R1_001 0.687983021 -0.48426977 -0.165921004 -0.63404337
## 2i6_q_247_d8_S166_R1_001 -0.005610120 -0.40071256 -0.160537632 -0.75763689
## 2j1_q_250_S169_R1_001 -0.163029116 0.41207603 -0.073569058 -0.05144553
## 2j4_q_254_S172_R1_001 1.939829155 -0.46710567 -0.123599693 -0.16694577
## 2j6_q_257_S174_R1_001 0.339197640 -0.30685656 -0.202345914 -0.84711496
## 2j7_q_259_d8_S175_R1_001 -0.170478161 0.11700872 -0.059573400 -0.25999939
## 2k4_q_267_S180_R1_001 -0.043621197 -0.03910844 -0.095527983 -0.20542526
## 2l4_q_282_S188_R1_001 -0.508317144 1.57909885 -0.015517867 0.29050305
## 3a1_q_289_S193_R1_001 0.360316641 -0.09362945 -0.183379817 0.09466893
## 3a4_q_294_S196_R1_001 1.654676569 -0.38124590 -0.096435809 -0.33989249
## 3b5_q_305_S205_R1_001 -0.399262440 0.12774387 -0.163787765 -0.49305472
## 3c2_q_312_S210_R1_001 0.304620799 -0.42498325 -0.179310235 -0.44503651
## 3c5_q_317_S213_R1_001 1.890696314 0.13603595 -0.032988050 0.50388257
## 3c7_q_322_S215_R1_001 0.379544777 1.01519691 -0.092914085 1.08195234
## 3d2_q_325_S218_R1_001 -0.082659502 -0.52454544 -0.199749838 -0.92002753
## 3d6_q_333_S222_R1_001 -0.068865617 -0.45913790 -0.206269355 -0.67135111
## 3e2_q_342_S226_R1_001 0.137660317 -0.01778525 -0.125044996 -0.21837645
## 3e3_q_343_S227_R1_001 0.228599212 0.85867987 -0.023059172 0.46679634
## 3e5_q_348_S229_R1_001 0.110151868 -0.39202581 -0.168827285 -0.47746082
## 3f2_q_353_d8_S234_R1_001 0.580631531 -0.31787248 -0.131266972 -0.33323815
## 3f4_q_356_S236_R1_001 0.241684607 -0.38426428 -0.174436203 -0.83738845
## 3f5_q_358_S237_R1_001 0.055736388 -0.48907374 -0.161154767 -0.69096928
## 3f7_q_360_S239_R1_001 1.240685536 -0.51115748 -0.166456941 -0.37523025
## 3f8_q_360_d8_S240_R1_001 0.206508248 -0.54884151 -0.231965593 -1.04556939
## 3h1_q_372_S249_R1_001 -0.321931940 -0.20062876 -0.145521677 -0.49078705
## 3h2_q_374_S250_R1_001 -0.077989616 -0.40957242 -0.199900886 -1.00888379
## 3h3_q_375_d8_S251_R1_001 0.023918597 -0.50825597 -0.252555408 -1.06700296
## 3h5_q_377_S253_R1_001 0.206624602 0.93161082 -0.113117780 0.95781878
## 3h8_q_387_d8_S256_R1_001 -0.520567487 0.57433447 -0.073081784 -0.17028803
## 3i3_q_391_S259_R1_001 0.466877113 1.21686116 0.002709982 0.97244657
## 3i4_q_392_S260_R1_001 -0.437682578 0.21600207 -0.092443264 -0.39239927
## 3j4_q_403_S268_R1_001 0.127935282 -0.31015358 -0.113554006 -0.61933737
## 3k1_q_411_S273_R1_001 -0.531958895 1.74341114 0.057241491 0.43232569
## 3k6_q_417_S278_R1_001 0.096255676 -0.54533667 -0.194263773 -0.88040526
## 3k7_q_419_S279_R1_001 1.410969598 -0.50565904 -0.096625964 -0.36010574
## 3k8_q_420_S280_R1_001 -0.147982771 -0.32632497 -0.125488478 -0.51739957
## 3l1_q_421_S281_R1_001 -0.378719653 -0.07379489 -0.231018739 -0.45002638
## 3l4_q_426_S284_R1_001 0.885380743 -0.33277685 -0.115217251 -0.41233264
## 3l6_q_428_S286_R1_001 -0.414310189 -0.45589789 -0.177417551 -0.90216484
## 3l8_q_432_S288_R1_001 -0.436179690 -0.47260761 -0.209829063 -0.87984537
## 5 6 7 8
## 1a1_q_002_S1_R1_001 -0.153958174 -0.924921160 -0.354781005 -0.584426385
## 1a3_q_004_S3_R1_001 -0.274268070 -0.447217126 0.589512528 0.598921297
## 1a7_q_007_d8_S7_R1_001 -0.018767203 -0.383198263 0.723989576 0.051023263
## 1a8_q_008_d8_S8_R1_001 -0.283301202 -0.447288310 0.556428670 -0.032406247
## 1c6_q_028_S22_R1_001 -0.116324182 0.359480786 -0.040932142 0.001411183
## 1d3_q_038_S27_R1_001 -0.018124208 -0.272091794 0.518926045 -0.120575363
## 1d5_q_042_S29_R1_001 -0.004841077 0.222942498 0.094901649 0.322552795
## 1e4_q_055_S36_R1_001 -0.023384604 0.686235337 0.304362191 0.201468554
## 1e5_q_056_S37_R1_001 3.001108103 2.794726489 -0.609160267 1.254886850
## 1e6_q_057_S38_R1_001 1.278834477 0.515986324 -0.565823895 0.673239009
## 1e8_q_059_S40_R1_001 0.042664972 0.148954386 0.371113939 0.840546796
## 1f1_q_060_S41_R1_001 -0.073907423 0.198346493 -0.163200857 0.225007773
## 1f2_q_062_S42_R1_001 0.279644263 -0.773724225 -0.779136809 -0.556446579
## 1f3_q_065_S43_R1_001 -0.138578664 -0.148853275 0.222174334 -0.252479954
## 1f4_q_066_S44_R1_001 0.742262686 -0.087123478 -0.093545206 -0.128993798
## 1f5_q_068_S45_R1_001 1.598916046 0.840402298 -0.225206345 0.470472125
## 1g3_q_075_S51_R1_001 -0.463685522 -0.847881944 0.540982295 -0.182646702
## 1g7_q_082_S55_R1_001 0.752992714 -0.774934794 -0.451953573 -0.112733338
## 1h3_q_086_d8_S59_R1_001 -0.501378182 -0.510109918 0.657516090 -0.328068192
## 1h4_q_090_d8_S60_R1_001 -0.121186940 -0.562001832 0.262405135 -0.132406627
## 1i2_q_104_S66_R1_001 2.808293432 1.329847936 -0.724093805 1.210904204
## 1i3_q_105_S67_R1_001 -0.423748128 -0.226203882 0.168668688 -0.076734949
## 1i7_q_110_S71_R1_001 -0.296350739 -0.238351823 -0.007081755 0.343755903
## 1j3_q_114_S75_R1_001 -0.021813427 -0.243916463 0.229504937 0.075125509
## 1j4_q_115_S76_R1_001 -0.196616237 -0.890961600 -0.196397041 -0.659117803
## 1j6_q_117_S78_R1_001 0.549444253 -0.325991782 -0.734857987 -0.115204243
## 1j7_q_118_S79_R1_001 0.037219558 -0.260465150 0.534407772 -0.053151031
## 1k2_q_121_S82_R1_001 0.199362890 -0.364045001 0.301727533 1.351445206
## 1k4_q_127_S84_R1_001 0.173924348 -0.406927751 0.072079474 -0.473781730
## 1k5_q_128_S85_R1_001 -0.215753164 -0.072101111 0.259760791 -0.062410241
## 1k6_q_130_S86_R1_001 -0.460316168 -0.400289996 0.389949688 -0.616419181
## 1k8_q_135_S88_R1_001 0.018683019 -0.904276921 -0.969458968 -0.723832177
## 2a1_q_146_S97_R1_001 1.548552328 1.536690333 -0.178393303 0.054346438
## 2a7_q_153_S103_R1_001 0.147745237 -0.438582913 0.683578621 0.459811399
## 2b1_q_156_S105_R1_001 0.020198161 0.618078195 0.182930022 0.944217317
## 2b2_q_158_S106_R1_001 -0.306414885 1.526392867 0.082234743 -0.018226581
## 2b3_q_160_S107_R1_001 0.390901598 -0.813561405 -0.565222051 -0.706699354
## 2b7_q_165_S111_R1_001 1.248019890 0.126337281 -0.934373530 -0.121122876
## 2b8_q_167_S112_R1_001 0.091921143 -0.156786025 0.969397006 -0.004710857
## 2c1_q_168_S113_R1_001 -0.446945531 -0.166032194 0.545028379 -0.348697158
## 2c3_q_171_S115_R1_001 1.583656713 -0.525062113 -0.639846863 0.753543137
## 2c4_q_172_d8_S116_R1_001 -0.355533240 -0.399971858 0.284564945 -0.227382478
## 2c5_q_173_S117_R1_001 -0.259914785 0.553831260 0.601120414 -0.148442431
## 2c7_q_178_S119_R1_001 0.135140823 -0.274655475 -0.441467600 -0.335061033
## 2d2_q_181_S122_R1_001 -0.412616594 -0.247284763 0.424106842 -0.555798480
## 2d7_q_189_S127_R1_001 -0.120166308 -0.007066215 0.501009353 0.597616838
## 2e6_q_200_d8_S134_R1_001 -0.573391211 -0.183495507 0.360228667 -0.428652845
## 2f6_q_212_S142_R1_001 0.430736702 -0.091817910 0.021102376 -0.352773200
## 2g1_q_217_S145_R1_001 1.474721140 -0.025911511 -0.341830399 0.304520478
## 2g4_q_221_S148_R1_001 0.647741677 0.176186584 0.036993965 1.910217724
## 2g8_q_229_S152_R1_001 -0.621887877 -0.463208823 0.194081547 -0.284880156
## 2h1_q_230_S153_R1_001 2.039957165 0.397077126 -0.589539442 -0.006630842
## 2h4_q_235_S156_R1_001 -0.504348358 -0.848503900 0.255414392 -0.148846897
## 2h7_q_238_S159_R1_001 -0.326914006 0.376117636 0.402644254 -0.140112370
## 2i1_q_242_S161_R1_001 0.434038292 -0.674249074 -0.747811958 0.007318162
## 2i4_q_246_S164_R1_001 -0.125010568 -0.278151201 0.240550082 -0.415217148
## 2i5_q_247_S165_R1_001 -0.381462914 -0.943061008 -0.352788461 -0.743383002
## 2i6_q_247_d8_S166_R1_001 -0.464964224 -0.634031044 0.223624265 -0.544913192
## 2j1_q_250_S169_R1_001 0.325284292 -0.169819425 0.681030330 -0.036504180
## 2j4_q_254_S172_R1_001 0.476705107 -0.847424041 -0.743644544 0.114926792
## 2j6_q_257_S174_R1_001 -0.218644633 -0.038083229 0.496308158 -0.431267644
## 2j7_q_259_d8_S175_R1_001 0.003665751 -0.030250939 0.549140225 -0.006946679
## 2k4_q_267_S180_R1_001 0.066252816 0.053435080 0.480857646 0.182634435
## 2l4_q_282_S188_R1_001 1.030555163 0.550425037 0.226764174 0.026764177
## 3a1_q_289_S193_R1_001 0.590364587 -0.417740150 0.262911604 2.570998452
## 3a4_q_294_S196_R1_001 0.366466737 -0.745802208 -0.666366936 -0.369921572
## 3b5_q_305_S205_R1_001 0.009970362 -0.112518855 0.814923500 -0.195784932
## 3c2_q_312_S210_R1_001 0.025100602 0.067827568 0.030897585 0.307255124
## 3c5_q_317_S213_R1_001 1.285242150 -0.434157916 -0.779605134 1.658738552
## 3c7_q_322_S215_R1_001 1.980884807 0.518454435 -0.276534431 1.163743299
## 3d2_q_325_S218_R1_001 -0.486130552 -0.083259435 0.300327702 -0.320047010
## 3d6_q_333_S222_R1_001 -0.275737319 0.380188979 0.232464582 0.853361004
## 3e2_q_342_S226_R1_001 0.221647875 -0.475727232 0.048413963 -0.132021913
## 3e3_q_343_S227_R1_001 1.313905172 0.048997273 -0.084524050 0.032460640
## 3e5_q_348_S229_R1_001 -0.136737036 0.025293329 -0.038963906 0.844919361
## 3f2_q_353_d8_S234_R1_001 -0.009009084 -0.424745025 0.396295713 -0.086210712
## 3f4_q_356_S236_R1_001 -0.296676268 -0.417604347 0.459312608 -0.665399869
## 3f5_q_358_S237_R1_001 -0.367387419 0.019020122 0.140396871 -0.282452074
## 3f7_q_360_S239_R1_001 0.232677999 -0.708896832 -0.788506091 -0.288040976
## 3f8_q_360_d8_S240_R1_001 -0.502458950 -0.045472372 -0.131730152 -0.433810292
## 3h1_q_372_S249_R1_001 -0.014092527 0.810541589 0.153714650 0.171429882
## 3h2_q_374_S250_R1_001 -0.420333546 -0.157066159 0.481945469 -0.500680953
## 3h3_q_375_d8_S251_R1_001 -0.626610929 -0.560543898 0.597881359 -0.532116758
## 3h5_q_377_S253_R1_001 1.897990604 0.558950444 -0.326640266 0.643963226
## 3h8_q_387_d8_S256_R1_001 0.144839064 -0.064330227 0.749218292 -0.086378266
## 3i3_q_391_S259_R1_001 2.298543000 0.285996895 -0.568169910 0.008373999
## 3i4_q_392_S260_R1_001 0.323715774 -0.242494345 0.459354969 0.147114346
## 3j4_q_403_S268_R1_001 -0.256017453 -0.912205830 -0.321592838 -0.824180372
## 3k1_q_411_S273_R1_001 1.113510333 0.569946172 0.153459978 -0.143470389
## 3k6_q_417_S278_R1_001 -0.452979794 0.226447792 0.182690271 -0.403184195
## 3k7_q_419_S279_R1_001 0.212632376 -0.760135990 -0.848613902 -0.546614988
## 3k8_q_420_S280_R1_001 -0.241773636 -0.050092535 0.430185733 0.092280541
## 3l1_q_421_S281_R1_001 0.185762457 0.165484548 0.344826889 0.400341980
## 3l4_q_426_S284_R1_001 -0.048090780 -1.117707933 -0.555011886 -0.821729061
## 3l6_q_428_S286_R1_001 -0.567873511 1.405126201 -0.096143189 -0.293971862
## 3l8_q_432_S288_R1_001 -0.478421532 0.085402829 0.308283750 -0.373295394
library(glue)
# adding sample description to data
data.sample<-kClustcentroids.any.trtlive.8 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.any.trtlive.8.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:768)[!duplicated(.$group.cluster)])
# plot
p8.any.trtlive<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): eight clusters",color = "Cluster",y="scaled expression level")
p8.any.trtlive
ggsave(p8.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.8clusters.png",width=11,height=8)
under construction
Let’s perform the actual clsutering using K=15:
set.seed(20)
kClust.any.trtlive.15 <- kmeans(cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], centers=15, nstart = 1000, iter.max = 20)
kClusters.any.trtlive.15 <- kClust.any.trtlive.15$cluster
# number of clusters
cluster.any.trtlive.15.num<-tibble(cluster=kClusters.any.trtlive.15) %>% group_by(cluster) %>% summarize(n=n())
cluster.any.trtlive.15.num$cluster<-as.character(cluster.any.trtlive.15.num$cluster) # classic way
cluster.any.trtlive.15.num
Now we can calculate the cluster ‘cores’ aka centroids: # function to find centroid in cluster i
clust.centroid = function(i, dat, clusters) {
ind = (clusters == i)
colMeans(dat[ind,])
}
kClustcentroids.any.trtlive.15 <- sapply(levels(factor(kClusters.any.trtlive.15)), clust.centroid, cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread[,-1], kClusters.any.trtlive.15)
kClustcentroids.any.trtlive.15
## 1 2 3 4
## 1a1_q_002_S1_R1_001 0.53634127 -0.368342751 -0.19308652 -0.994353765
## 1a3_q_004_S3_R1_001 -0.72943329 -0.364642600 -0.73384747 -0.449043022
## 1a7_q_007_d8_S7_R1_001 -0.34271346 -0.002591793 -0.18433094 -0.412446658
## 1a8_q_008_d8_S8_R1_001 -0.25098727 -0.479632975 -0.58712815 -0.436360551
## 1c6_q_028_S22_R1_001 -0.20647922 -0.519653925 -0.47851481 0.424046487
## 1d3_q_038_S27_R1_001 -0.45237747 -0.096021258 -0.33381386 -0.210028815
## 1d5_q_042_S29_R1_001 -0.37921457 -0.316640471 -0.48207801 0.213537259
## 1e4_q_055_S36_R1_001 -0.33018383 -0.352610696 -0.48854598 0.786627982
## 1e5_q_056_S37_R1_001 -0.18964695 2.326526590 1.31284361 3.664484291
## 1e6_q_057_S38_R1_001 0.35748840 0.439742320 0.61379835 0.811489161
## 1e8_q_059_S40_R1_001 -0.57344042 -0.205920280 -0.47119015 0.180898074
## 1f1_q_060_S41_R1_001 -0.14922359 -0.472678895 -0.55313292 0.236231842
## 1f2_q_062_S42_R1_001 0.63651588 -0.509246429 -0.31707501 -0.661529654
## 1f3_q_065_S43_R1_001 -0.23110446 -0.404863815 -0.57974285 -0.066022718
## 1f4_q_066_S44_R1_001 -0.35278525 0.962937779 0.43872224 0.021524021
## 1f5_q_068_S45_R1_001 -0.25588031 1.378049475 1.12313746 1.210274117
## 1g3_q_075_S51_R1_001 -0.44792348 -0.568292653 -0.82905325 -0.890478254
## 1g7_q_082_S55_R1_001 0.72633495 -0.080637410 0.73228387 -0.795123279
## 1h3_q_086_d8_S59_R1_001 -0.29920071 -0.513618704 -0.65232001 -0.527459386
## 1h4_q_090_d8_S60_R1_001 -0.24548146 -0.353938969 -0.50771555 -0.610790228
## 1i2_q_104_S66_R1_001 -0.30096486 2.224491882 1.35764269 1.734753262
## 1i3_q_105_S67_R1_001 -0.46617942 -0.582322052 -0.87902057 -0.221836381
## 1i7_q_110_S71_R1_001 -0.46126166 -0.557292215 -0.70614487 -0.249459795
## 1j3_q_114_S75_R1_001 -0.21487978 -0.353694579 -0.38865178 -0.267270835
## 1j4_q_115_S76_R1_001 0.16895152 -0.424543497 -0.48561996 -0.877063575
## 1j6_q_117_S78_R1_001 0.52681286 -0.289151902 -0.18113877 -0.176544625
## 1j7_q_118_S79_R1_001 -0.37906708 -0.183318695 -0.24738837 -0.145767362
## 1k2_q_121_S82_R1_001 -0.43674604 -0.230598214 -0.30521423 -0.442600889
## 1k4_q_127_S84_R1_001 -0.07200795 0.090102170 0.08678698 -0.376519392
## 1k5_q_128_S85_R1_001 -0.33020048 -0.359593501 -0.46034696 -0.076297079
## 1k6_q_130_S86_R1_001 -0.43061924 -0.398830427 -0.53666735 -0.371111514
## 1k8_q_135_S88_R1_001 0.83710513 -0.631865537 -0.39409446 -0.893247336
## 2a1_q_146_S97_R1_001 -0.49258121 1.995394581 0.93507191 1.953704208
## 2a7_q_153_S103_R1_001 -0.49454133 -0.100675057 -0.26495917 -0.488492153
## 2b1_q_156_S105_R1_001 -0.83180307 0.225015925 -0.53444092 0.573982476
## 2b2_q_158_S106_R1_001 -0.41535292 -0.425755847 -0.74919915 1.606934793
## 2b3_q_160_S107_R1_001 0.46237604 -0.069628419 0.02848632 -0.744101263
## 2b7_q_165_S111_R1_001 1.31756665 0.105270613 0.77115818 0.350984464
## 2b8_q_167_S112_R1_001 -0.72244868 0.264335569 -0.15019604 -0.167280601
## 2c1_q_168_S113_R1_001 -0.82376851 -0.396974197 -0.93171407 -0.193799398
## 2c3_q_171_S115_R1_001 -0.03412986 0.416163950 0.92555485 -0.521674029
## 2c4_q_172_d8_S116_R1_001 -0.72379391 -0.505848865 -0.86466264 -0.433987397
## 2c5_q_173_S117_R1_001 -0.39482248 -0.272782635 -0.59216999 0.490317993
## 2c7_q_178_S119_R1_001 -0.03883303 -0.157560964 -0.28476500 -0.216851306
## 2d2_q_181_S122_R1_001 -0.42054894 -0.467867747 -0.78208875 -0.181534715
## 2d7_q_189_S127_R1_001 -0.41105157 -0.418810528 -0.69233636 -0.011829492
## 2e6_q_200_d8_S134_R1_001 -0.41698501 -0.590588987 -0.91296499 -0.180346610
## 2f6_q_212_S142_R1_001 -0.76474963 0.843697180 0.08605002 0.011474899
## 2g1_q_217_S145_R1_001 -0.42226502 1.182073561 1.08383065 0.082266136
## 2g4_q_221_S148_R1_001 -0.31375669 -0.053062100 -0.02535975 0.162501139
## 2g8_q_229_S152_R1_001 -0.50942934 -0.658486487 -1.02230595 -0.518342486
## 2h1_q_230_S153_R1_001 0.17784394 1.754882779 1.61959457 0.562339030
## 2h4_q_235_S156_R1_001 -0.55138662 -0.646602378 -0.84413910 -0.903795479
## 2h7_q_238_S159_R1_001 -0.33843179 -0.444108789 -0.64674140 0.343929510
## 2i1_q_242_S161_R1_001 0.85864638 -0.520379233 -0.05262601 -0.615346016
## 2i4_q_246_S164_R1_001 -0.35252458 -0.387697794 -0.53293408 -0.291039810
## 2i5_q_247_S165_R1_001 0.33955450 -0.594906691 -0.39929108 -0.972466316
## 2i6_q_247_d8_S166_R1_001 -0.31101400 -0.545855723 -0.68347298 -0.689561140
## 2j1_q_250_S169_R1_001 -0.43318644 0.377964052 0.15656346 -0.121326028
## 2j4_q_254_S172_R1_001 0.86305001 -0.499530146 0.01910866 -0.838443237
## 2j6_q_257_S174_R1_001 -0.40616922 -0.375301396 -0.72059988 0.097759975
## 2j7_q_259_d8_S175_R1_001 -0.43558680 0.113928380 -0.20772906 -0.132822705
## 2k4_q_267_S180_R1_001 -0.43105037 -0.047631737 -0.22701649 0.008246768
## 2l4_q_282_S188_R1_001 -0.63315160 2.155665841 0.59732802 0.742891468
## 3a1_q_289_S193_R1_001 -0.41190937 -0.077217908 -0.07948568 -0.455978853
## 3a4_q_294_S196_R1_001 0.78398092 -0.437152810 -0.11872224 -0.767081698
## 3b5_q_305_S205_R1_001 -0.72196113 0.007554792 -0.35543867 -0.089257169
## 3c2_q_312_S210_R1_001 -0.33162499 -0.472352366 -0.55226939 0.023396453
## 3c5_q_317_S213_R1_001 0.57641327 0.348782501 0.44848551 -0.313256112
## 3c7_q_322_S215_R1_001 -0.39624555 1.305823671 1.23561479 0.731686825
## 3d2_q_325_S218_R1_001 -0.54763350 -0.604152593 -0.91656643 -0.126705755
## 3d6_q_333_S222_R1_001 -0.56314684 -0.517024465 -0.83779909 0.428910295
## 3e2_q_342_S226_R1_001 -0.26427365 -0.113558620 0.01849752 -0.403840155
## 3e3_q_343_S227_R1_001 -0.27599098 1.019333996 0.97316719 0.242189942
## 3e5_q_348_S229_R1_001 -0.43796289 -0.380070760 -0.64120742 -0.065094411
## 3f2_q_353_d8_S234_R1_001 0.02692133 -0.390669979 -0.29049338 -0.527010247
## 3f4_q_356_S236_R1_001 -0.22978766 -0.434507970 -0.67128439 -0.359061548
## 3f5_q_358_S237_R1_001 -0.38407562 -0.565993884 -0.68246776 -0.041817183
## 3f7_q_360_S239_R1_001 0.47297397 -0.496165576 -0.24549460 -0.681585815
## 3f8_q_360_d8_S240_R1_001 -0.57562925 -0.620894611 -1.04225114 -0.077580335
## 3h1_q_372_S249_R1_001 -0.59853361 -0.218548574 -0.56850185 0.831337469
## 3h2_q_374_S250_R1_001 -0.48519100 -0.493134961 -0.90250285 -0.088557852
## 3h3_q_375_d8_S251_R1_001 -0.52797275 -0.611792527 -0.98082736 -0.589994111
## 3h5_q_377_S253_R1_001 -0.33684769 1.487058308 1.11120500 0.773878891
## 3h8_q_387_d8_S256_R1_001 -0.68604048 0.613742015 -0.01976978 -0.205550669
## 3i3_q_391_S259_R1_001 0.05820257 1.550274445 1.64463447 0.479510876
## 3i4_q_392_S260_R1_001 -0.72802460 0.249221374 -0.25598967 -0.298001019
## 3j4_q_403_S268_R1_001 0.12505811 -0.382590734 -0.34636371 -0.879887089
## 3k1_q_411_S273_R1_001 -0.58494815 2.351175657 0.72105754 0.775057112
## 3k6_q_417_S278_R1_001 -0.32989649 -0.634456188 -0.86954787 0.330504035
## 3k7_q_419_S279_R1_001 0.80181082 -0.521094320 -0.20318419 -0.732645594
## 3k8_q_420_S280_R1_001 -0.40191910 -0.347992086 -0.62256802 -0.167776164
## 3l1_q_421_S281_R1_001 -0.80233598 -0.063712225 -0.48436207 0.213300348
## 3l4_q_426_S284_R1_001 0.55970821 -0.421366161 -0.05877164 -1.172096416
## 3l6_q_428_S286_R1_001 -0.56160275 -0.465935417 -0.98668096 1.448146801
## 3l8_q_432_S288_R1_001 -0.60491821 -0.514935554 -0.88741547 0.044958636
## 5 6 7 8
## 1a1_q_002_S1_R1_001 0.539067262 0.677494289 -0.2143971100 -0.53950313
## 1a3_q_004_S3_R1_001 0.040619088 0.087791367 -0.3582435129 0.18574802
## 1a7_q_007_d8_S7_R1_001 0.404652920 -0.031086513 -0.1696025316 0.02199784
## 1a8_q_008_d8_S8_R1_001 0.400582886 0.358359368 -0.3610603150 0.14468767
## 1c6_q_028_S22_R1_001 -0.204006982 0.146627590 -0.1992854348 0.49896429
## 1d3_q_038_S27_R1_001 0.116043377 -0.009315873 -0.2560808754 -0.17461624
## 1d5_q_042_S29_R1_001 -0.149526915 0.439373327 -0.1656386329 0.42862123
## 1e4_q_055_S36_R1_001 0.038224369 0.100564248 -0.2286539037 0.63169311
## 1e5_q_056_S37_R1_001 -1.166943317 1.094709970 3.3885981462 -0.41410581
## 1e6_q_057_S38_R1_001 -0.576838712 1.277657687 1.7145476347 -0.38727769
## 1e8_q_059_S40_R1_001 -0.514146497 0.434065043 -0.0379737613 0.11106249
## 1f1_q_060_S41_R1_001 -0.179456731 0.842879312 -0.1518483882 0.53979832
## 1f2_q_062_S42_R1_001 0.636722011 1.547085329 -0.0563625467 -0.67806700
## 1f3_q_065_S43_R1_001 0.541615938 0.435108618 -0.3243500327 -0.08582989
## 1f4_q_066_S44_R1_001 0.101281733 -0.284385993 0.5134341137 -0.54993343
## 1f5_q_068_S45_R1_001 -0.737451460 0.449208744 1.6116234610 -0.61407054
## 1g3_q_075_S51_R1_001 0.398887174 0.381907270 -0.5295963426 -0.23540887
## 1g7_q_082_S55_R1_001 -0.290587913 1.082518078 0.4897295523 -0.69693499
## 1h3_q_086_d8_S59_R1_001 0.588383843 -0.024620396 -0.6454940950 -0.03703038
## 1h4_q_090_d8_S60_R1_001 -0.069585079 0.246217845 -0.2943928720 -0.16844009
## 1i2_q_104_S66_R1_001 -0.994658322 0.945545429 3.2722204713 -0.60872965
## 1i3_q_105_S67_R1_001 0.193382870 0.116934030 -0.5551025380 0.36900677
## 1i7_q_110_S71_R1_001 -0.215146858 0.294320759 -0.3941437161 0.09636140
## 1j3_q_114_S75_R1_001 0.113724542 0.675387520 -0.1559093732 0.13478917
## 1j4_q_115_S76_R1_001 1.371295711 0.391177718 -0.3539140389 -0.55810201
## 1j6_q_117_S78_R1_001 -0.018087061 1.899552830 0.4812962735 -0.58313485
## 1j7_q_118_S79_R1_001 0.398447111 0.022725698 -0.2083738408 -0.20355529
## 1k2_q_121_S82_R1_001 -0.561743095 0.454143394 0.1808581603 0.01105252
## 1k4_q_127_S84_R1_001 0.664380979 -0.244485916 -0.0405808734 -0.17140637
## 1k5_q_128_S85_R1_001 -0.032812063 0.095011268 -0.3007154943 0.38318763
## 1k6_q_130_S86_R1_001 1.157261715 -0.272860962 -0.5994254617 -0.15636388
## 1k8_q_135_S88_R1_001 0.215496211 1.599051388 -0.0431255889 -0.63605575
## 2a1_q_146_S97_R1_001 -0.189928258 -0.168414613 1.5414923023 -0.39197395
## 2a7_q_153_S103_R1_001 -0.343985568 0.420922001 0.0636925973 -0.05242950
## 2b1_q_156_S105_R1_001 0.010042769 -0.186432095 -0.1793395828 0.40173558
## 2b2_q_158_S106_R1_001 -0.120295194 0.053978239 -0.4569879346 0.72588318
## 2b3_q_160_S107_R1_001 0.671494280 0.786073999 0.1709272829 -0.62453585
## 2b7_q_165_S111_R1_001 -0.022319502 2.130054528 1.4027439987 -0.51657160
## 2b8_q_167_S112_R1_001 0.239840129 -0.356583165 -0.1643451768 -0.14477770
## 2c1_q_168_S113_R1_001 0.025999566 -0.182440550 -0.6684025652 0.36193943
## 2c3_q_171_S115_R1_001 -1.116136433 1.234948983 1.7670587467 -0.73326175
## 2c4_q_172_d8_S116_R1_001 -0.119292291 -0.005957709 -0.5402171943 -0.12156965
## 2c5_q_173_S117_R1_001 -0.121020988 -0.170402398 -0.4208972571 0.72028246
## 2c7_q_178_S119_R1_001 0.232869734 0.603297851 -0.0004349371 -0.35876488
## 2d2_q_181_S122_R1_001 0.827200131 -0.033394630 -0.6554408451 -0.15945277
## 2d7_q_189_S127_R1_001 0.010027508 0.211292804 -0.2572713126 0.28735894
## 2e6_q_200_d8_S134_R1_001 0.905203978 0.212931528 -0.6618989220 0.18594488
## 2f6_q_212_S142_R1_001 0.191965931 -0.645735143 0.2388280490 -0.26573640
## 2g1_q_217_S145_R1_001 -0.665440222 0.382282943 1.5592669882 -0.70549001
## 2g4_q_221_S148_R1_001 -0.617580643 0.881305005 0.5430941259 0.20265209
## 2g8_q_229_S152_R1_001 0.194382292 0.010841285 -0.7538151471 0.37799954
## 2h1_q_230_S153_R1_001 -0.339713651 0.722928688 1.8612743117 -0.52856003
## 2h4_q_235_S156_R1_001 0.217532549 0.253942768 -0.5714849028 -0.17463318
## 2h7_q_238_S159_R1_001 -0.005392722 -0.044121166 -0.4550573626 0.85848329
## 2i1_q_242_S161_R1_001 -0.087577491 2.172297235 0.5026312459 -0.55544775
## 2i4_q_246_S164_R1_001 0.481776312 0.164120777 -0.3786469392 0.11271458
## 2i5_q_247_S165_R1_001 0.591413625 0.796099359 -0.5654830811 -0.49802762
## 2i6_q_247_d8_S166_R1_001 0.419115318 0.121352879 -0.6368115704 0.04769499
## 2j1_q_250_S169_R1_001 0.266940037 -0.138808875 0.0852169509 -0.34757126
## 2j4_q_254_S172_R1_001 -0.247674196 2.356729690 0.3845039887 -0.53147570
## 2j6_q_257_S174_R1_001 0.616181740 0.451494655 -0.4330665746 -0.05939335
## 2j7_q_259_d8_S175_R1_001 0.181718026 -0.127443808 -0.0964415552 0.08629102
## 2k4_q_267_S180_R1_001 0.018271534 0.099985999 -0.0602999156 0.35072274
## 2l4_q_282_S188_R1_001 -0.018903608 -0.421113051 1.0788319131 -0.45586458
## 3a1_q_289_S193_R1_001 -0.748195930 0.721441433 0.4615100012 -0.22769141
## 3a4_q_294_S196_R1_001 0.195809731 1.988117850 0.3150733544 -0.51684804
## 3b5_q_305_S205_R1_001 0.721107835 -0.320865476 -0.2349890936 -0.28712353
## 3c2_q_312_S210_R1_001 -0.320220045 0.552212065 -0.1309726455 0.32984993
## 3c5_q_317_S213_R1_001 -0.279544954 2.330074137 1.3635770042 -0.57412300
## 3c7_q_322_S215_R1_001 -1.117137075 0.888030413 2.3605212474 -0.61736477
## 3d2_q_325_S218_R1_001 0.326818325 0.109099816 -0.6696734461 0.54478268
## 3d6_q_333_S222_R1_001 -0.186318607 0.181339785 -0.3041718360 0.36235176
## 3e2_q_342_S226_R1_001 0.324596926 0.297569987 -0.0304286400 -0.43430212
## 3e3_q_343_S227_R1_001 -0.215677112 0.483583365 1.1002692355 -0.51583860
## 3e5_q_348_S229_R1_001 -0.307546344 0.336794663 -0.2084058157 0.47090159
## 3f2_q_353_d8_S234_R1_001 0.012694555 0.782067317 -0.1536500855 -0.02116768
## 3f4_q_356_S236_R1_001 1.269137687 0.339233628 -0.4811330872 -0.22187063
## 3f5_q_358_S237_R1_001 0.375509971 0.169573528 -0.5430411589 0.33714631
## 3f7_q_360_S239_R1_001 0.283998280 1.520244716 0.1363744139 -0.54242632
## 3f8_q_360_d8_S240_R1_001 0.238835213 0.398441046 -0.6031815559 0.13342025
## 3h1_q_372_S249_R1_001 0.234848660 -0.249941417 -0.1299292919 0.46654298
## 3h2_q_374_S250_R1_001 0.998622336 -0.065742108 -0.6122293991 -0.20092775
## 3h3_q_375_d8_S251_R1_001 0.679232827 0.160034724 -0.7759473874 -0.12112787
## 3h5_q_377_S253_R1_001 -0.794322784 0.614481480 2.2312786111 -0.65460516
## 3h8_q_387_d8_S256_R1_001 0.160166582 -0.463193150 0.0531193210 0.01321435
## 3i3_q_391_S259_R1_001 -0.345391391 0.762403234 2.3500257727 -0.73101036
## 3i4_q_392_S260_R1_001 0.112416898 -0.251060874 0.1379385157 -0.38609698
## 3j4_q_403_S268_R1_001 1.008316393 0.078990218 -0.3432753436 -0.65163019
## 3k1_q_411_S273_R1_001 0.055231303 -0.385093836 1.2083381062 -0.42468951
## 3k6_q_417_S278_R1_001 0.312287405 0.268042088 -0.5810819626 0.27613925
## 3k7_q_419_S279_R1_001 0.448564517 1.641204732 0.2670149207 -0.49271413
## 3k8_q_420_S280_R1_001 0.308914699 -0.049097539 -0.2691750233 0.54461119
## 3l1_q_421_S281_R1_001 -0.358518811 -0.177368726 0.1294985411 -0.14216688
## 3l4_q_426_S284_R1_001 0.700467462 0.958371024 -0.2305557163 -0.74678793
## 3l6_q_428_S286_R1_001 0.130651957 -0.467641297 -0.6202664495 0.62938603
## 3l8_q_432_S288_R1_001 0.374263459 -0.423449758 -0.6194770877 0.26929507
## 9 10 11 12
## 1a1_q_002_S1_R1_001 -0.093282280 -5.176534e-02 -0.34883557 -0.56861725
## 1a3_q_004_S3_R1_001 -0.116079106 -1.247636e-01 0.94170519 0.65243451
## 1a7_q_007_d8_S7_R1_001 -0.037159747 1.462772e-01 -0.06335732 0.78312661
## 1a8_q_008_d8_S8_R1_001 -0.081197186 -1.466626e-01 0.10727360 0.49219358
## 1c6_q_028_S22_R1_001 -0.069078099 4.886612e-02 0.18259940 -0.27710161
## 1d3_q_038_S27_R1_001 -0.048745253 4.805125e-01 -0.19548676 0.70625374
## 1d5_q_042_S29_R1_001 -0.055987920 3.489170e-01 0.65116637 -0.05901000
## 1e4_q_055_S36_R1_001 -0.052759764 4.179516e-01 0.37093696 0.05516222
## 1e5_q_056_S37_R1_001 0.169004284 2.519392e+00 0.70828887 -0.32746835
## 1e6_q_057_S38_R1_001 0.052503586 5.905900e-01 0.62108269 -0.55602494
## 1e8_q_059_S40_R1_001 -0.031474785 5.515744e-02 1.08996863 0.49458140
## 1f1_q_060_S41_R1_001 -0.065746809 1.167396e-01 0.62615557 -0.54324202
## 1f2_q_062_S42_R1_001 -0.055717381 9.996102e-01 -0.35957937 -1.24154322
## 1f3_q_065_S43_R1_001 -0.086402746 2.539461e-01 -0.19184707 0.05739724
## 1f4_q_066_S44_R1_001 -0.007249898 1.200481e+00 -0.30782811 0.01419741
## 1f5_q_068_S45_R1_001 0.079288396 1.615407e+00 0.15195289 0.14481544
## 1g3_q_075_S51_R1_001 -0.151793393 -3.138769e-01 -0.02858004 0.64853393
## 1g7_q_082_S55_R1_001 0.044371834 1.145119e+00 -0.26891610 -0.40455248
## 1h3_q_086_d8_S59_R1_001 -0.124803185 -2.154913e-01 -0.23139238 0.68496147
## 1h4_q_090_d8_S60_R1_001 -0.102455314 3.308598e-01 -0.04576717 0.35472691
## 1i2_q_104_S66_R1_001 0.133690497 2.248935e+00 0.84169859 -0.52643463
## 1i3_q_105_S67_R1_001 -0.130658053 -1.482697e-01 0.22599946 -0.03739119
## 1i7_q_110_S71_R1_001 -0.083746561 -9.210903e-02 0.62034624 -0.09702181
## 1j3_q_114_S75_R1_001 -0.077501458 1.515651e-01 0.27231685 0.12428751
## 1j4_q_115_S76_R1_001 -0.133011108 2.020132e-01 -0.49680674 -0.60262090
## 1j6_q_117_S78_R1_001 -0.112935758 8.220863e-01 0.11376531 -1.02032081
## 1j7_q_118_S79_R1_001 -0.052598136 5.079476e-01 -0.08361925 0.66913647
## 1k2_q_121_S82_R1_001 -0.053606485 2.493880e-01 1.82498751 0.47561883
## 1k4_q_127_S84_R1_001 -0.077257959 7.125345e-01 -0.62147132 -0.08680889
## 1k5_q_128_S85_R1_001 -0.070930687 -1.359256e-01 0.14418892 0.04562564
## 1k6_q_130_S86_R1_001 -0.127907807 -1.448528e-01 -0.58628876 0.31914361
## 1k8_q_135_S88_R1_001 -0.124636749 2.706583e-01 -0.50408566 -1.34011699
## 2a1_q_146_S97_R1_001 0.098963253 1.840869e+00 -0.37214945 0.09847402
## 2a7_q_153_S103_R1_001 -0.060308288 2.373909e-01 0.29550810 1.08427959
## 2b1_q_156_S105_R1_001 0.020417496 5.165598e-01 1.34722427 0.10143625
## 2b2_q_158_S106_R1_001 -0.044175479 2.729903e-02 0.26637128 -0.14273100
## 2b3_q_160_S107_R1_001 -0.085407624 9.417926e-01 -0.63745644 -0.82364966
## 2b7_q_165_S111_R1_001 -0.004654114 1.024787e+00 -0.09374060 -1.19312629
## 2b8_q_167_S112_R1_001 -0.033905863 5.819147e-01 -0.16182604 1.39743279
## 2c1_q_168_S113_R1_001 -0.124475165 6.016021e-02 -0.11215911 0.44920249
## 2c3_q_171_S115_R1_001 -0.034510354 1.367030e+00 0.56353121 -0.41044940
## 2c4_q_172_d8_S116_R1_001 -0.095251138 1.365371e-01 -0.02622658 0.34165606
## 2c5_q_173_S117_R1_001 -0.035500637 8.756770e-03 -0.09356680 0.61272865
## 2c7_q_178_S119_R1_001 -0.016560151 5.200298e-01 -0.24421700 -0.65648135
## 2d2_q_181_S122_R1_001 -0.149532783 1.434138e-01 -0.43031295 0.35164777
## 2d7_q_189_S127_R1_001 -0.096918263 2.050379e-01 1.08077443 0.53148342
## 2e6_q_200_d8_S134_R1_001 -0.134266348 -3.278021e-01 -0.20291925 0.12694939
## 2f6_q_212_S142_R1_001 -0.020883054 8.998193e-01 -0.48309896 0.10393443
## 2g1_q_217_S145_R1_001 0.000452660 1.231264e+00 0.04999153 -0.04108279
## 2g4_q_221_S148_R1_001 -0.010577161 8.197523e-01 2.38328414 0.09548876
## 2g8_q_229_S152_R1_001 -0.160633270 -2.690822e-01 0.04643640 -0.11938996
## 2h1_q_230_S153_R1_001 0.089167007 2.501200e+00 -0.28724704 -0.50284244
## 2h4_q_235_S156_R1_001 -0.139950424 -3.439501e-01 0.06441462 0.29613410
## 2h7_q_238_S159_R1_001 -0.092584396 -2.518427e-02 0.06912832 0.06841295
## 2i1_q_242_S161_R1_001 -0.111334432 4.316169e-01 0.16525850 -0.98617977
## 2i4_q_246_S164_R1_001 -0.049676881 4.471470e-01 -0.26878097 0.05834035
## 2i5_q_247_S165_R1_001 -0.161766417 -7.010678e-02 -0.63663703 -0.65036747
## 2i6_q_247_d8_S166_R1_001 -0.131000151 -1.220562e-01 -0.40779877 0.02149601
## 2j1_q_250_S169_R1_001 -0.020841218 7.905999e-01 -0.21862400 1.03404855
## 2j4_q_254_S172_R1_001 -0.065964790 6.212782e-01 0.23359327 -0.95166977
## 2j6_q_257_S174_R1_001 -0.096266234 3.150204e-01 -0.37057896 0.55344924
## 2j7_q_259_d8_S175_R1_001 -0.034791105 2.682403e-01 -0.19729897 0.78426642
## 2k4_q_267_S180_R1_001 -0.056755778 3.372559e-01 0.20441459 0.52875164
## 2l4_q_282_S188_R1_001 0.046801924 1.049270e+00 -0.32111711 0.55330024
## 3a1_q_289_S193_R1_001 -0.016283330 7.839718e-01 3.42570105 0.63719837
## 3a4_q_294_S196_R1_001 -0.055028889 5.579441e-01 -0.23569255 -0.96901947
## 3b5_q_305_S205_R1_001 -0.080026792 5.550273e-01 -0.29669335 1.14746830
## 3c2_q_312_S210_R1_001 -0.096335989 4.966010e-01 0.48032752 -0.03636153
## 3c5_q_317_S213_R1_001 0.067465219 1.374733e+00 2.19094503 -0.82224510
## 3c7_q_322_S215_R1_001 0.060876243 1.269154e+00 0.92430894 0.18564491
## 3d2_q_325_S218_R1_001 -0.138484576 -6.384438e-02 -0.11976292 0.02673022
## 3d6_q_333_S222_R1_001 -0.104406092 -8.661356e-02 1.48500087 0.20381363
## 3e2_q_342_S226_R1_001 -0.095560903 6.994399e-01 -0.26723222 0.13659555
## 3e3_q_343_S227_R1_001 0.032227089 1.680994e+00 -0.35020467 0.15300660
## 3e5_q_348_S229_R1_001 -0.081650992 4.287000e-02 1.34357814 -0.17930351
## 3f2_q_353_d8_S234_R1_001 -0.064847877 3.886854e-01 -0.08032344 0.44572455
## 3f4_q_356_S236_R1_001 -0.150233467 1.266909e-01 -0.60459470 0.33045496
## 3f5_q_358_S237_R1_001 -0.123950084 3.009540e-02 -0.01938314 -0.05089865
## 3f7_q_360_S239_R1_001 -0.139060952 4.220418e-01 -0.12443001 -1.12864352
## 3f8_q_360_d8_S240_R1_001 -0.111862430 -1.239111e-01 -0.22884629 -0.30464463
## 3h1_q_372_S249_R1_001 -0.072104201 3.296868e-01 0.34024577 -0.03622522
## 3h2_q_374_S250_R1_001 -0.116032527 9.670143e-02 -0.31180552 0.50418150
## 3h3_q_375_d8_S251_R1_001 -0.140375672 -2.074910e-01 -0.39846030 0.62424176
## 3h5_q_377_S253_R1_001 0.018970396 1.318443e+00 0.22277241 0.02751369
## 3h8_q_387_d8_S256_R1_001 -0.014324360 3.825759e-01 -0.30853976 1.19531173
## 3i3_q_391_S259_R1_001 0.057304289 2.326290e+00 -0.35371172 -0.43305662
## 3i4_q_392_S260_R1_001 0.028287038 7.198527e-01 0.12634536 0.85456714
## 3j4_q_403_S268_R1_001 -0.139587775 1.941833e-05 -0.65101120 -0.58761576
## 3k1_q_411_S273_R1_001 0.076548669 9.836930e-01 -0.40196052 0.46398294
## 3k6_q_417_S278_R1_001 -0.130312190 -1.160545e-01 -0.17440373 0.01745052
## 3k7_q_419_S279_R1_001 -0.106135258 2.449997e-01 -0.31526122 -1.26394061
## 3k8_q_420_S280_R1_001 -0.073679357 -1.515619e-01 0.30335566 0.23833979
## 3l1_q_421_S281_R1_001 -0.087011670 3.735254e-01 0.48761750 0.65817262
## 3l4_q_426_S284_R1_001 -0.141674050 3.414606e-01 -0.66480069 -0.82335483
## 3l6_q_428_S286_R1_001 -0.106064562 -2.877813e-01 -0.07293916 -0.35535915
## 3l8_q_432_S288_R1_001 -0.128205289 -1.151206e-01 -0.21836920 0.15015978
## 13 14 15
## 1a1_q_002_S1_R1_001 -0.618056948 -0.317859857 -0.51652672
## 1a3_q_004_S3_R1_001 0.061626282 -0.315313752 -0.67408007
## 1a7_q_007_d8_S7_R1_001 0.143372033 0.066701371 -0.05687140
## 1a8_q_008_d8_S8_R1_001 -0.161627150 -0.378009042 -0.65386741
## 1c6_q_028_S22_R1_001 -0.168973900 -0.341682014 -0.49404419
## 1d3_q_038_S27_R1_001 -0.263670768 0.002871791 -0.43285776
## 1d5_q_042_S29_R1_001 0.047844090 -0.210012529 -0.35591706
## 1e4_q_055_S36_R1_001 -0.081467264 -0.226135370 -0.28430667
## 1e5_q_056_S37_R1_001 2.058234646 0.791298137 0.27423758
## 1e6_q_057_S38_R1_001 1.204780543 0.164863565 -0.05210617
## 1e8_q_059_S40_R1_001 0.575539815 -0.152007939 -0.26483685
## 1f1_q_060_S41_R1_001 0.002132848 -0.330578777 -0.58185519
## 1f2_q_062_S42_R1_001 -0.619960180 -0.524693407 -0.99049212
## 1f3_q_065_S43_R1_001 -0.417111446 -0.317789717 -0.74568299
## 1f4_q_066_S44_R1_001 -0.019996496 0.573943917 -0.05858046
## 1f5_q_068_S45_R1_001 0.876268565 0.738361579 0.15867519
## 1g3_q_075_S51_R1_001 -0.472540766 -0.419812091 -0.91213395
## 1g7_q_082_S55_R1_001 0.238462433 0.035713957 -0.02035499
## 1h3_q_086_d8_S59_R1_001 -0.493979152 -0.330641528 -0.62490995
## 1h4_q_090_d8_S60_R1_001 -0.356539576 -0.251291104 -0.58832398
## 1i2_q_104_S66_R1_001 1.778294746 0.789069963 0.16460413
## 1i3_q_105_S67_R1_001 -0.517332376 -0.437824820 -0.73841387
## 1i7_q_110_S71_R1_001 -0.047185778 -0.394919454 -0.62227188
## 1j3_q_114_S75_R1_001 0.032600834 -0.317618183 -0.45804551
## 1j4_q_115_S76_R1_001 -0.799092586 -0.411244255 -0.81179802
## 1j6_q_117_S78_R1_001 -0.149304842 -0.394074424 -0.86516746
## 1j7_q_118_S79_R1_001 -0.170127437 -0.105106591 -0.54878279
## 1k2_q_121_S82_R1_001 0.725768695 -0.165199188 -0.21906889
## 1k4_q_127_S84_R1_001 -0.329831274 0.071400807 -0.20226734
## 1k5_q_128_S85_R1_001 -0.122775559 -0.244899019 -0.46317531
## 1k6_q_130_S86_R1_001 -0.741065646 -0.239531004 -0.64522512
## 1k8_q_135_S88_R1_001 -0.847930010 -0.531109183 -0.87106276
## 2a1_q_146_S97_R1_001 0.480166377 0.974063717 0.27681064
## 2a7_q_153_S103_R1_001 0.430094127 -0.131868939 -0.34328247
## 2b1_q_156_S105_R1_001 0.208616945 0.266985362 -0.17465895
## 2b2_q_158_S106_R1_001 -0.451346791 -0.184238273 -0.22039511
## 2b3_q_160_S107_R1_001 -0.615265372 -0.172698429 -0.68921737
## 2b7_q_165_S111_R1_001 0.342997505 -0.050640576 -0.37665305
## 2b8_q_167_S112_R1_001 -0.133013899 0.237574812 -0.23223730
## 2c1_q_168_S113_R1_001 -0.733247860 -0.343548571 -0.83934275
## 2c3_q_171_S115_R1_001 1.267037813 0.114552079 -0.02903776
## 2c4_q_172_d8_S116_R1_001 -0.667289382 -0.328728745 -0.90195783
## 2c5_q_173_S117_R1_001 -0.356786155 -0.115190660 -0.38269631
## 2c7_q_178_S119_R1_001 -0.460411927 -0.156163676 -0.78706864
## 2d2_q_181_S122_R1_001 -0.833896551 -0.311534038 -0.82254930
## 2d7_q_189_S127_R1_001 -0.046002912 -0.295189918 -0.55149890
## 2e6_q_200_d8_S134_R1_001 -0.756098631 -0.400403026 -0.74454360
## 2f6_q_212_S142_R1_001 -0.241216889 0.492485915 -0.24030585
## 2g1_q_217_S145_R1_001 0.812910112 0.593909770 0.05125145
## 2g4_q_221_S148_R1_001 1.291466350 0.005228630 -0.15643011
## 2g8_q_229_S152_R1_001 -0.766211856 -0.467235003 -0.85053595
## 2h1_q_230_S153_R1_001 0.562698016 0.876327332 0.12806370
## 2h4_q_235_S156_R1_001 -0.485992956 -0.436662428 -0.77300581
## 2h7_q_238_S159_R1_001 -0.369033053 -0.268076032 -0.37383992
## 2i1_q_242_S161_R1_001 0.038509046 -0.459760067 -0.77776313
## 2i4_q_246_S164_R1_001 -0.533630897 -0.229712539 -0.57016704
## 2i5_q_247_S165_R1_001 -0.799082075 -0.426051376 -0.86893494
## 2i6_q_247_d8_S166_R1_001 -0.687284120 -0.312494553 -0.63673485
## 2j1_q_250_S169_R1_001 0.089982056 0.291234820 -0.10870803
## 2j4_q_254_S172_R1_001 0.194270376 -0.420000714 -0.86858815
## 2j6_q_257_S174_R1_001 -0.657327055 -0.278145883 -0.85211410
## 2j7_q_259_d8_S175_R1_001 -0.106303604 0.152266749 -0.14496585
## 2k4_q_267_S180_R1_001 0.094417722 -0.018258118 -0.17030353
## 2l4_q_282_S188_R1_001 0.250933378 0.975855812 0.03085169
## 3a1_q_289_S193_R1_001 1.400470539 -0.067761685 -0.26256753
## 3a4_q_294_S196_R1_001 -0.239724605 -0.368452449 -0.86744076
## 3b5_q_305_S205_R1_001 -0.319437014 0.086747517 -0.38804262
## 3c2_q_312_S210_R1_001 0.012589829 -0.316039444 -0.46644314
## 3c5_q_317_S213_R1_001 1.226777048 0.011264463 -0.34394950
## 3c7_q_322_S215_R1_001 1.668555341 0.672577021 0.21954537
## 3d2_q_325_S218_R1_001 -0.666227694 -0.394499683 -0.81959434
## 3d6_q_333_S222_R1_001 -0.013669411 -0.358124352 -0.49915947
## 3e2_q_342_S226_R1_001 -0.045954700 -0.033187611 -0.49996014
## 3e3_q_343_S227_R1_001 0.485928482 0.555416493 -0.18016747
## 3e5_q_348_S229_R1_001 0.224930323 -0.293387936 -0.54351155
## 3f2_q_353_d8_S234_R1_001 -0.202576501 -0.228523424 -0.43499216
## 3f4_q_356_S236_R1_001 -0.827363518 -0.375055265 -0.78179055
## 3f5_q_358_S237_R1_001 -0.537655884 -0.395169232 -0.55933403
## 3f7_q_360_S239_R1_001 -0.191475754 -0.460926642 -0.90303976
## 3f8_q_360_d8_S240_R1_001 -0.825575218 -0.465522214 -0.85226167
## 3h1_q_372_S249_R1_001 -0.165114295 -0.129721856 -0.29416075
## 3h2_q_374_S250_R1_001 -0.893016236 -0.370498849 -0.85253780
## 3h3_q_375_d8_S251_R1_001 -0.902307327 -0.431184662 -0.99239465
## 3h5_q_377_S253_R1_001 1.256878568 0.486025322 0.11236429
## 3h8_q_387_d8_S256_R1_001 -0.228351211 0.448732680 -0.06301181
## 3i3_q_391_S259_R1_001 0.650287022 0.703084556 0.05047647
## 3i4_q_392_S260_R1_001 -0.016681100 0.161582518 -0.50008435
## 3j4_q_403_S268_R1_001 -0.944146291 -0.322606776 -0.74680498
## 3k1_q_411_S273_R1_001 0.215636521 1.123198209 0.14790358
## 3k6_q_417_S278_R1_001 -0.791323421 -0.449720527 -0.70275873
## 3k7_q_419_S279_R1_001 -0.478739079 -0.480384557 -0.83395146
## 3k8_q_420_S280_R1_001 -0.143910786 -0.266092041 -0.41753642
## 3l1_q_421_S281_R1_001 0.174104505 -0.104245722 -0.44243229
## 3l4_q_426_S284_R1_001 -0.824282026 -0.341787120 -0.75698953
## 3l6_q_428_S286_R1_001 -0.761535128 -0.309627067 -0.52866440
## 3l8_q_432_S288_R1_001 -0.715494321 -0.416918935 -0.58849734
library(glue)
# adding sample description to data
data.sample<-kClustcentroids.any.trtlive.15 %>% as_tibble(rownames="sample") %>%
gather(cluster,value,-1) %>%
inner_join(sample.description.timecourse,by="sample") %>%
inner_join(cluster.any.trtlive.15.num,by="cluster") %>%
mutate(cluster.n=glue('{cluster2} \n({n2})',
cluster2=cluster,
n2=n) )
## Warning: Column `sample` joining character vector and factor, coercing into
## character vector
# data.group for adding group cluster mean
data.group<-data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% group_by(group.cluster) %>% summarize(group.cluster.mean=mean(value)) %>% inner_join(
data.sample %>% unite("group.cluster", c("group","cluster"),remove=FALSE) %>% dplyr::select("group.cluster","sampling_day","soil_trt","cluster.n","cluster"),by="group.cluster") %>% dplyr::slice(rep(1:1440)[!duplicated(.$group.cluster)])
# plot
p15.any.trtlive<- ggplot(data.sample,aes(x=soil_trt,y=value, group=cluster, colour=as.factor(cluster))) +
geom_point() + geom_hline(yintercept=0,color="red") +
geom_line(data=data.group,aes(x=soil_trt,y=group.cluster.mean)) +
facet_grid(cluster.n~sampling_day) + theme(axis.text=element_text(angle=90),strip.text.y=element_text(angle=0))+
labs(title= "K-means clustering of two afternoon DEGs (live vs dead soil): fifteen clusters",color = "Cluster",y="scaled expression level")
p15.any.trtlive
ggsave(p15.any.trtlive,file="../output/Twoafternoon.any.trtlive.DEG.Kmean.15clusters.png",width=11,height=15)
# 8 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.8) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1462 GO:0015760 5.623992e-07 1.0000000 3
## 359 GO:0006355 1.096760e-06 0.9999997 27
## 1449 GO:0015714 1.949991e-06 1.0000000 3
## 2143 GO:0035436 3.151415e-06 1.0000000 3
## 1448 GO:0015713 6.685127e-06 1.0000000 3
## numInCat term ontology
## 1462 5 glucose-6-phosphate transport BP
## 359 2992 regulation of transcription, DNA-templated BP
## 1449 7 phosphoenolpyruvate transport BP
## 2143 8 triose phosphate transmembrane transport BP
## 1448 10 phosphoglycerate transmembrane transport BP
## over_represented_padjust
## 1462 0.002077812
## 359 0.002077812
## 1449 0.002462839
## 2143 0.002985178
## 1448 0.005065989
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120 3.933902e-14 1.0000000 10
## 2253 GO:0042742 8.640276e-12 1.0000000 30
## 1166 GO:0010112 1.217158e-11 1.0000000 8
## 890 GO:0009611 3.002585e-10 1.0000000 21
## 638 GO:0006952 2.886080e-09 1.0000000 34
## 2750 GO:0050832 8.711643e-08 1.0000000 18
## 557 GO:0006749 2.512131e-07 1.0000000 8
## 853 GO:0009407 3.046529e-07 1.0000000 7
## 1210 GO:0010200 1.093472e-06 0.9999998 13
## 899 GO:0009626 1.204013e-06 0.9999999 10
## 894 GO:0009617 2.637069e-06 0.9999996 12
## 900 GO:0009627 3.095308e-06 0.9999997 8
## 3438 GO:1900056 3.275946e-06 0.9999999 5
## 2237 GO:0042538 9.409826e-06 0.9999990 8
## 1031 GO:0009863 2.617001e-05 0.9999987 5
## 933 GO:0009684 2.626258e-05 0.9999993 4
## 619 GO:0006887 3.813169e-05 0.9999961 7
## 485 GO:0006569 4.543349e-05 0.9999995 3
## 2986 GO:0062034 6.221247e-05 0.9999992 3
## 2400 GO:0044419 6.855688e-05 0.9999991 3
## 898 GO:0009625 8.048379e-05 0.9999926 6
## 3708 GO:2000022 1.235004e-04 0.9999877 6
## 2034 GO:0034052 1.235119e-04 0.9999978 3
## 972 GO:0009751 2.045166e-04 0.9999533 11
## 477 GO:0006561 2.669627e-04 0.9999933 3
## 932 GO:0009682 2.729335e-04 0.9999850 4
## 185 GO:0002229 3.590825e-04 0.9999460 7
## 646 GO:0006979 3.684017e-04 0.9998966 13
## 1001 GO:0009816 3.687152e-04 0.9999547 6
## 32 GO:0000162 3.720090e-04 0.9999777 4
## 2230 GO:0042372 4.057400e-04 0.9999876 3
## numInCat term
## 1174 27 camalexin biosynthetic process
## 2253 726 defense response to bacterium
## 1166 22 regulation of systemic acquired resistance
## 890 419 response to wounding
## 638 1165 defense response
## 2750 469 defense response to fungus
## 557 85 glutathione metabolic process
## 853 67 toxin catabolic process
## 1210 286 response to chitin
## 899 140 plant-type hypersensitive response
## 894 241 response to bacterium
## 900 104 systemic acquired resistance
## 3438 26 negative regulation of leaf senescence
## 2237 132 hyperosmotic salinity response
## 1031 38 salicylic acid mediated signaling pathway
## 933 17 indoleacetic acid biosynthetic process
## 619 96 exocytosis
## 485 7 tryptophan catabolic process
## 2986 8 L-pipecolic acid biosynthetic process
## 2400 8 interspecies interaction between organisms
## 898 84 response to insect
## 3708 89 regulation of jasmonic acid mediated signaling pathway
## 2034 10 positive regulation of plant-type hypersensitive response
## 972 347 response to salicylic acid
## 477 13 proline biosynthetic process
## 932 33 induced systemic resistance
## 185 126 defense response to oomycetes
## 646 502 response to oxidative stress
## 1001 97 defense response to bacterium, incompatible interaction
## 32 35 tryptophan biosynthetic process
## 2230 18 phylloquinone biosynthetic process
## ontology over_represented_padjust
## 1174 BP 1.490555e-10
## 2253 BP 1.537270e-08
## 1166 BP 1.537270e-08
## 890 BP 2.844199e-07
## 638 BP 2.187072e-06
## 2750 BP 5.501403e-05
## 557 BP 1.359781e-04
## 853 BP 1.442912e-04
## 1210 BP 4.562004e-04
## 899 BP 4.562004e-04
## 894 BP 9.083503e-04
## 900 BP 9.548123e-04
## 3438 BP 9.548123e-04
## 2237 BP 2.546702e-03
## 1031 BP 6.219308e-03
## 933 BP 6.219308e-03
## 619 BP 8.498880e-03
## 485 BP 9.563750e-03
## 2986 BP 1.240648e-02
## 2400 BP 1.298810e-02
## 898 BP 1.452157e-02
## 3708 BP 2.034725e-02
## 2034 BP 2.034725e-02
## 972 BP 3.228805e-02
## 477 BP 3.977480e-02
## 932 BP 3.977480e-02
## 185 BP 4.698474e-02
## 646 BP 4.698474e-02
## 1001 BP 4.698474e-02
## 32 BP 4.698474e-02
## 2230 BP 4.959190e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200 1.343043e-10 1.0000000 22
## 3164 GO:0071732 1.889538e-10 1.0000000 11
## 3094 GO:0071281 1.613934e-09 1.0000000 12
## 359 GO:0006355 2.926398e-07 1.0000000 76
## 914 GO:0009644 3.813626e-07 1.0000000 11
## 3125 GO:0071456 4.645253e-07 1.0000000 7
## 3114 GO:0071369 2.507765e-06 0.9999998 8
## 638 GO:0006952 4.345368e-06 0.9999982 38
## 1166 GO:0010112 1.355092e-05 0.9999994 5
## 1039 GO:0009873 1.550994e-05 0.9999960 17
## 170 GO:0001944 2.120295e-05 0.9999985 6
## 853 GO:0009407 2.365588e-05 0.9999978 7
## 1919 GO:0032268 2.430717e-05 0.9999998 3
## 854 GO:0009408 3.627089e-05 0.9999909 15
## 3708 GO:2000022 3.991164e-05 0.9999950 8
## 2239 GO:0042542 8.277225e-05 0.9999865 9
## 1270 GO:0010286 8.336564e-05 0.9999883 8
## 646 GO:0006979 1.072674e-04 0.9999647 19
## 2795 GO:0051259 1.232705e-04 0.9999947 4
## 557 GO:0006749 1.465446e-04 0.9999812 7
## 2253 GO:0042742 2.068113e-04 0.9999181 24
## 3117 GO:0071398 2.323324e-04 0.9999950 3
## numInCat term ontology
## 1210 286 response to chitin BP
## 3164 52 cellular response to nitric oxide BP
## 3094 77 cellular response to iron ion BP
## 359 2992 regulation of transcription, DNA-templated BP
## 914 109 response to high light intensity BP
## 3125 35 cellular response to hypoxia BP
## 3114 62 cellular response to ethylene stimulus BP
## 638 1165 defense response BP
## 1166 22 regulation of systemic acquired resistance BP
## 1039 364 ethylene-activated signaling pathway BP
## 170 41 vasculature development BP
## 853 67 toxin catabolic process BP
## 1919 5 regulation of cellular protein metabolic process BP
## 854 305 response to heat BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 2239 130 response to hydrogen peroxide BP
## 1270 99 heat acclimation BP
## 646 502 response to oxidative stress BP
## 2795 22 protein complex oligomerization BP
## 557 85 glutathione metabolic process BP
## 2253 726 defense response to bacterium BP
## 3117 9 cellular response to fatty acid BP
## over_represented_padjust
## 1210 3.579730e-07
## 3164 3.579730e-07
## 3094 2.038399e-06
## 359 2.772030e-04
## 914 2.889966e-04
## 3125 2.933477e-04
## 3114 1.357418e-03
## 638 2.058075e-03
## 1166 5.704937e-03
## 1039 5.876717e-03
## 170 7.084606e-03
## 853 7.084606e-03
## 1919 7.084606e-03
## 854 9.816458e-03
## 3708 1.008168e-02
## 2239 1.858073e-02
## 1270 1.858073e-02
## 646 2.257979e-02
## 2795 2.458274e-02
## 557 2.776287e-02
## 2253 3.731466e-02
## 3117 4.001398e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816 1.135827e-05 0.9999990 7
## 890 GO:0009611 1.193363e-05 0.9999976 13
## 3251 GO:0080142 2.049805e-05 0.9999995 4
## 1205 GO:0010193 3.256854e-05 0.9999982 5
## 187 GO:0002237 5.567136e-05 0.9999952 6
## 638 GO:0006952 6.623245e-05 0.9999771 22
## 1210 GO:0010200 8.592369e-05 0.9999857 9
## 973 GO:0009753 1.046543e-04 0.9999800 10
## numInCat term ontology
## 1001 97 defense response to bacterium, incompatible interaction BP
## 890 419 response to wounding BP
## 3251 19 regulation of salicylic acid biosynthetic process BP
## 1205 54 response to ozone BP
## 187 88 response to molecule of bacterial origin BP
## 638 1165 defense response BP
## 1210 286 response to chitin BP
## 973 338 response to jasmonic acid BP
## over_represented_padjust
## 1001 0.02260826
## 890 0.02260826
## 3251 0.02588904
## 1205 0.03085055
## 187 0.04182579
## 638 0.04182579
## 1210 0.04650927
## 973 0.04956691
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 8.757706e-16 1.0000000 30
## 3708 GO:2000022 7.994984e-11 1.0000000 10
## 423 GO:0006468 5.373869e-10 1.0000000 37
## 2750 GO:0050832 2.165212e-09 1.0000000 17
## 638 GO:0006952 8.977942e-09 1.0000000 28
## 1857 GO:0031347 6.083376e-08 1.0000000 8
## 26 GO:0000103 2.373158e-07 1.0000000 5
## 890 GO:0009611 7.356536e-07 0.9999999 14
## 1210 GO:0010200 3.530396e-06 0.9999996 10
## 1174 GO:0010120 2.304231e-05 0.9999993 4
## 642 GO:0006970 3.054236e-05 0.9999956 9
## 3009 GO:0070417 3.406753e-05 0.9999981 5
## 973 GO:0009753 3.498600e-05 0.9999942 10
## 1183 GO:0010150 8.085242e-05 0.9999885 8
## 1858 GO:0031348 9.938846e-05 0.9999931 5
## 3442 GO:1900067 1.076857e-04 0.9999998 2
## 2223 GO:0042344 1.092568e-04 0.9999979 3
## 972 GO:0009751 1.274416e-04 0.9999777 9
## 2917 GO:0055114 1.933095e-04 0.9999217 26
## numInCat term ontology
## 2253 726 defense response to bacterium BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 423 1484 protein phosphorylation BP
## 2750 469 defense response to fungus BP
## 638 1165 defense response BP
## 1857 95 regulation of defense response BP
## 26 24 sulfate assimilation BP
## 890 419 response to wounding BP
## 1210 286 response to chitin BP
## 1174 27 camalexin biosynthetic process BP
## 642 252 response to osmotic stress BP
## 3009 54 cellular response to cold BP
## 973 338 response to jasmonic acid BP
## 1183 219 leaf senescence BP
## 1858 64 negative regulation of defense response BP
## 3442 3 regulation of cellular response to alkaline pH BP
## 2223 15 indole glucosinolate catabolic process BP
## 972 347 response to salicylic acid BP
## 2917 1923 oxidation-reduction process BP
## over_represented_padjust
## 2253 3.318295e-12
## 3708 1.514650e-07
## 423 6.787196e-07
## 2750 2.050997e-06
## 638 6.803485e-06
## 1857 3.841652e-05
## 26 1.284556e-04
## 890 3.484239e-04
## 1210 1.486297e-03
## 1174 8.730730e-03
## 642 1.019707e-02
## 3009 1.019707e-02
## 973 1.019707e-02
## 1183 2.188213e-02
## 1858 2.435142e-02
## 3442 2.435142e-02
## 2223 2.435142e-02
## 972 2.682646e-02
## 2917 3.854999e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 5.357720e-16 1.0000000 10
## 1181 GO:0010143 9.648414e-08 1.0000000 5
## 514 GO:0006629 1.779627e-07 1.0000000 9
## 2917 GO:0055114 3.330532e-07 0.9999999 23
## 515 GO:0006631 1.472111e-05 0.9999993 5
## 953 GO:0009725 1.731199e-05 0.9999998 3
## 1531 GO:0016042 2.458469e-05 0.9999976 7
## 2136 GO:0035336 4.497929e-05 0.9999994 3
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 1181 29 cutin biosynthetic process BP
## 514 220 lipid metabolic process BP
## 2917 1923 oxidation-reduction process BP
## 515 85 fatty acid metabolic process BP
## 953 14 response to hormone BP
## 1531 229 lipid catabolic process BP
## 2136 17 long-chain fatty-acyl-CoA metabolic process BP
## over_represented_padjust
## 1291 2.030040e-12
## 1181 1.827892e-04
## 514 2.247668e-04
## 2917 3.154847e-04
## 515 1.093252e-02
## 953 1.093252e-02
## 1531 1.330734e-02
## 2136 2.130332e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441 1.046761e-06 1.0000000 3
## 646 GO:0006979 1.940648e-06 0.9999998 10
## 1292 GO:0010350 3.796980e-05 0.9999999 2
## 3096 GO:0071286 3.796980e-05 0.9999999 2
## 3104 GO:0071325 3.796980e-05 0.9999999 2
## 3197 GO:0072709 3.796980e-05 0.9999999 2
## 1838 GO:0031115 6.036162e-05 0.9999999 2
## 736 GO:0007568 8.378585e-05 0.9999965 4
## 1572 GO:0016310 9.069704e-05 0.9999832 10
## 2844 GO:0051592 9.983663e-05 0.9999997 2
## 1840 GO:0031117 1.039244e-04 0.9999996 2
## 3201 GO:0075733 1.069764e-04 0.9999996 2
## 3093 GO:0071280 1.177608e-04 0.9999995 2
## 2162 GO:0035865 1.203759e-04 0.9999995 2
## 1619 GO:0018008 1.219134e-04 0.9999995 2
## 1248 GO:0010248 1.574567e-04 0.9999993 2
## 452 GO:0006520 2.210690e-04 0.9999881 4
## numInCat
## 1695 7
## 646 502
## 1292 4
## 3096 4
## 3104 4
## 3197 4
## 1838 5
## 736 85
## 1572 685
## 2844 6
## 1840 7
## 3201 7
## 3093 7
## 2162 7
## 1619 6
## 1248 6
## 452 90
## term
## 1695 tryptophan catabolic process to kynurenine
## 646 response to oxidative stress
## 1292 cellular response to magnesium starvation
## 3096 cellular response to magnesium ion
## 3104 cellular response to mannitol stimulus
## 3197 cellular response to sorbitol
## 1838 negative regulation of microtubule polymerization
## 736 aging
## 1572 phosphorylation
## 2844 response to calcium ion
## 1840 positive regulation of microtubule depolymerization
## 3201 intracellular transport of virus
## 3093 cellular response to copper ion
## 2162 cellular response to potassium ion
## 1619 N-terminal peptidyl-glycine N-myristoylation
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 452 cellular amino acid metabolic process
## ontology over_represented_padjust
## 1695 BP 0.003676557
## 646 BP 0.003676557
## 1292 BP 0.023977928
## 3096 BP 0.023977928
## 3104 BP 0.023977928
## 3197 BP 0.023977928
## 1838 BP 0.030795317
## 736 BP 0.030795317
## 1572 BP 0.030795317
## 2844 BP 0.030795317
## 1840 BP 0.030795317
## 3201 BP 0.030795317
## 3093 BP 0.030795317
## 2162 BP 0.030795317
## 1619 BP 0.030795317
## 1248 BP 0.037287711
## 452 BP 0.049272379
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.8cluster.csv")
# 15 Kmeans cluster
temp<-tibble(transcript_ID=cpm.timecourse.v3.0.scale.twoafternoon.any.trtlive.DEG.spread$transcript_ID, cluster=kClusters.any.trtlive.15) %>% split(.$cluster) %>% map(function(x) {GOseq.Brgo.v3.0.Atgoslim.BP.list.ORA(genelist=x$transcript_ID)})
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 3114 GO:0071369 0.000000e+00 1.0000000 8
## 3164 GO:0071732 0.000000e+00 1.0000000 8
## 3094 GO:0071281 5.887219e-11 1.0000000 8
## 2907 GO:0055072 1.403391e-06 1.0000000 5
## 359 GO:0006355 9.616408e-06 0.9999972 22
## 1195 GO:0010167 1.116999e-05 0.9999997 4
## 613 GO:0006880 1.510212e-05 0.9999998 3
## 1760 GO:0030001 2.151165e-05 0.9999989 5
## 1764 GO:0030026 2.323359e-05 0.9999997 3
## 254 GO:0006096 5.543201e-05 0.9999966 5
## 612 GO:0006879 6.469221e-05 0.9999989 3
## 1114 GO:0010039 7.167439e-05 0.9999987 3
## 1952 GO:0032869 7.691956e-05 0.9999986 3
## 1462 GO:0015760 9.484983e-05 0.9999997 2
## 2487 GO:0045893 1.310402e-04 0.9999802 8
## 1798 GO:0030418 1.891972e-04 0.9999990 2
## 1449 GO:0015714 1.979116e-04 0.9999990 2
## 1438 GO:0015689 2.211728e-04 0.9999988 2
## numInCat term ontology
## 3114 62 cellular response to ethylene stimulus BP
## 3164 52 cellular response to nitric oxide BP
## 3094 77 cellular response to iron ion BP
## 2907 69 iron ion homeostasis BP
## 359 2992 regulation of transcription, DNA-templated BP
## 1195 53 response to nitrate BP
## 613 20 intracellular sequestering of iron ion BP
## 1760 130 metal ion transport BP
## 1764 21 cellular manganese ion homeostasis BP
## 254 138 glycolytic process BP
## 612 29 cellular iron ion homeostasis BP
## 1114 33 response to iron ion BP
## 1952 30 cellular response to insulin stimulus BP
## 1462 5 glucose-6-phosphate transport BP
## 2487 570 positive regulation of transcription, DNA-templated BP
## 1798 8 nicotianamine biosynthetic process BP
## 1449 7 phosphoenolpyruvate transport BP
## 1438 7 molybdate ion transport BP
## over_represented_padjust
## 3114 0.000000e+00
## 3164 0.000000e+00
## 3094 7.435558e-08
## 2907 1.329362e-03
## 359 7.053851e-03
## 1195 7.053851e-03
## 613 8.174561e-03
## 1760 9.781343e-03
## 1764 9.781343e-03
## 254 2.100319e-02
## 612 2.228352e-02
## 1114 2.241909e-02
## 1952 2.241909e-02
## 1462 2.567043e-02
## 2487 3.310074e-02
## 1798 4.411100e-02
## 1449 4.411100e-02
## 1438 4.655688e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1174 GO:0010120 1.723122e-11 1.0000000 7
## 2253 GO:0042742 6.152850e-08 1.0000000 16
## 2750 GO:0050832 1.527669e-06 0.9999998 11
## 1031 GO:0009863 1.866709e-05 0.9999995 4
## 2230 GO:0042372 3.841027e-05 0.9999995 3
## 900 GO:0009627 5.003514e-05 0.9999970 5
## 894 GO:0009617 6.392564e-05 0.9999927 7
## 975 GO:0009759 6.961346e-05 0.9999988 3
## 855 GO:0009409 7.064222e-05 0.9999859 11
## 2790 GO:0051245 9.537243e-05 0.9999997 2
## 1166 GO:0010112 9.614152e-05 0.9999982 3
## 944 GO:0009697 1.070904e-04 0.9999979 3
## 1210 GO:0010200 1.072639e-04 0.9999867 7
## 3438 GO:1900056 1.520096e-04 0.9999966 3
## 2301 GO:0043069 1.719698e-04 0.9999959 3
## 1858 GO:0031348 1.811769e-04 0.9999908 4
## 557 GO:0006749 1.932567e-04 0.9999899 4
## numInCat term ontology
## 1174 27 camalexin biosynthetic process BP
## 2253 726 defense response to bacterium BP
## 2750 469 defense response to fungus BP
## 1031 38 salicylic acid mediated signaling pathway BP
## 2230 18 phylloquinone biosynthetic process BP
## 900 104 systemic acquired resistance BP
## 894 241 response to bacterium BP
## 975 19 indole glucosinolate biosynthetic process BP
## 855 696 response to cold BP
## 2790 4 negative regulation of cellular defense response BP
## 1166 22 regulation of systemic acquired resistance BP
## 944 21 salicylic acid biosynthetic process BP
## 1210 286 response to chitin BP
## 3438 26 negative regulation of leaf senescence BP
## 2301 30 negative regulation of programmed cell death BP
## 1858 64 negative regulation of defense response BP
## 557 85 glutathione metabolic process BP
## over_represented_padjust
## 1174 6.528911e-08
## 2253 1.165657e-04
## 2750 1.929446e-03
## 1031 1.768240e-02
## 2230 2.910731e-02
## 900 2.974037e-02
## 894 2.974037e-02
## 975 2.974037e-02
## 855 2.974037e-02
## 2790 3.126329e-02
## 1166 3.126329e-02
## 944 3.126329e-02
## 1210 3.126329e-02
## 3438 4.114032e-02
## 2301 4.290496e-02
## 1858 4.290496e-02
## 557 4.307350e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1001 GO:0009816 9.333650e-07 0.9999999 7
## 3708 GO:2000022 1.751096e-06 0.9999999 6
## 894 GO:0009617 1.952047e-06 0.9999998 9
## 1857 GO:0031347 5.230519e-05 0.9999968 5
## numInCat term ontology
## 1001 97 defense response to bacterium, incompatible interaction BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 894 241 response to bacterium BP
## 1857 95 regulation of defense response BP
## over_represented_padjust
## 1001 0.002465435
## 3708 0.002465435
## 894 0.002465435
## 1857 0.049546090
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1291 GO:0010345 0.000000e+00 1.0000000 9
## 1181 GO:0010143 1.631724e-08 1.0000000 5
## 2917 GO:0055114 2.704739e-07 0.9999999 19
## 514 GO:0006629 2.154168e-06 0.9999999 7
## 515 GO:0006631 2.635121e-06 0.9999999 5
## 1531 GO:0016042 3.108001e-05 0.9999976 6
## 1527 GO:0016024 6.205455e-05 0.9999990 3
## numInCat term ontology
## 1291 41 suberin biosynthetic process BP
## 1181 29 cutin biosynthetic process BP
## 2917 1923 oxidation-reduction process BP
## 514 220 lipid metabolic process BP
## 515 85 fatty acid metabolic process BP
## 1531 229 lipid catabolic process BP
## 1527 29 CDP-diacylglycerol biosynthetic process BP
## over_represented_padjust
## 1291 0.000000e+00
## 1181 3.091302e-05
## 2917 3.416085e-04
## 514 1.996895e-03
## 515 1.996895e-03
## 1531 1.962703e-02
## 1527 3.358924e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1530 GO:0016036 3.257160e-06 0.9999999 5
## 3219 GO:0080040 1.654299e-05 1.0000000 2
## numInCat term
## 1530 148 cellular response to phosphate starvation
## 3219 5 positive regulation of cellular response to phosphate starvation
## ontology over_represented_padjust
## 1530 BP 0.01234138
## 3219 BP 0.03134070
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 2253 GO:0042742 2.034123e-17 1.0000000 24
## 423 GO:0006468 1.807727e-12 1.0000000 29
## 1210 GO:0010200 7.269714e-08 1.0000000 9
## 972 GO:0009751 4.471802e-07 1.0000000 9
## 2750 GO:0050832 1.489589e-06 0.9999998 10
## 185 GO:0002229 1.667994e-05 0.9999988 6
## 638 GO:0006952 7.042662e-05 0.9999827 14
## 1174 GO:0010120 7.723694e-05 0.9999986 3
## 1858 GO:0031348 9.506968e-05 0.9999959 4
## 3708 GO:2000022 1.129933e-04 0.9999949 4
## numInCat term ontology
## 2253 726 defense response to bacterium BP
## 423 1484 protein phosphorylation BP
## 1210 286 response to chitin BP
## 972 347 response to salicylic acid BP
## 2750 469 defense response to fungus BP
## 185 126 defense response to oomycetes BP
## 638 1165 defense response BP
## 1174 27 camalexin biosynthetic process BP
## 1858 64 negative regulation of defense response BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## over_represented_padjust
## 2253 7.707293e-14
## 423 3.424739e-09
## 1210 9.181649e-05
## 972 4.235915e-04
## 2750 1.128811e-03
## 185 1.053338e-02
## 638 3.658134e-02
## 1174 3.658134e-02
## 1858 4.002434e-02
## 3708 4.281315e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1737 GO:0019761 2.032233e-05 0.9999994 4
## 2997 GO:0070179 2.418116e-05 1.0000000 2
## numInCat term ontology
## 1737 69 glucosinolate biosynthetic process BP
## 2997 4 D-serine biosynthetic process BP
## over_represented_padjust
## 1737 0.0458112
## 2997 0.0458112
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1210 GO:0010200 4.498580e-11 1.0000000 19
## 914 GO:0009644 9.502988e-08 1.0000000 10
## 359 GO:0006355 1.299821e-07 1.0000000 58
## 3125 GO:0071456 8.872693e-07 1.0000000 6
## 3708 GO:2000022 2.292534e-06 0.9999998 8
## 2750 GO:0050832 3.085071e-06 0.9999993 17
## 638 GO:0006952 4.156627e-06 0.9999986 29
## 2239 GO:0042542 4.285154e-06 0.9999995 9
## 1919 GO:0032268 8.024532e-06 1.0000000 3
## 854 GO:0009408 8.921277e-06 0.9999983 13
## 58 GO:0000302 3.385541e-05 0.9999966 7
## 853 GO:0009407 3.481444e-05 0.9999973 6
## 2795 GO:0051259 3.977016e-05 0.9999987 4
## 170 GO:0001944 4.197116e-05 0.9999976 5
## 3465 GO:1900457 9.998220e-05 0.9999983 3
## 557 GO:0006749 1.469226e-04 0.9999849 6
## numInCat term ontology
## 1210 286 response to chitin BP
## 914 109 response to high light intensity BP
## 359 2992 regulation of transcription, DNA-templated BP
## 3125 35 cellular response to hypoxia BP
## 3708 89 regulation of jasmonic acid mediated signaling pathway BP
## 2750 469 defense response to fungus BP
## 638 1165 defense response BP
## 2239 130 response to hydrogen peroxide BP
## 1919 5 regulation of cellular protein metabolic process BP
## 854 305 response to heat BP
## 58 96 response to reactive oxygen species BP
## 853 67 toxin catabolic process BP
## 2795 22 protein complex oligomerization BP
## 170 41 vasculature development BP
## 3465 10 regulation of brassinosteroid mediated signaling pathway BP
## 557 85 glutathione metabolic process BP
## over_represented_padjust
## 1210 1.704512e-07
## 914 1.641674e-04
## 359 1.641674e-04
## 3125 8.404659e-04
## 3708 1.737282e-03
## 2750 1.948222e-03
## 638 2.029556e-03
## 2239 2.029556e-03
## 1919 3.378328e-03
## 854 3.380272e-03
## 58 1.099266e-02
## 853 1.099266e-02
## 2795 1.135919e-02
## 170 1.135919e-02
## 3465 2.525550e-02
## 557 3.479310e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 26 GO:0000103 9.838045e-10 1 5
## numInCat term ontology over_represented_padjust
## 26 24 sulfate assimilation BP 3.727635e-06
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1695 GO:0019441 9.629362e-08 1.0000000 3
## 1292 GO:0010350 9.046552e-06 1.0000000 2
## 3096 GO:0071286 9.046552e-06 1.0000000 2
## 3104 GO:0071325 9.046552e-06 1.0000000 2
## 3197 GO:0072709 9.046552e-06 1.0000000 2
## 1838 GO:0031115 1.460646e-05 1.0000000 2
## 859 GO:0009414 1.567843e-05 0.9999986 7
## 2844 GO:0051592 2.317204e-05 1.0000000 2
## 1619 GO:0018008 2.655419e-05 1.0000000 2
## 1840 GO:0031117 2.684154e-05 1.0000000 2
## 3201 GO:0075733 2.734060e-05 0.9999999 2
## 1248 GO:0010248 2.913482e-05 0.9999999 2
## 3093 GO:0071280 2.918774e-05 0.9999999 2
## 2162 GO:0035865 2.961950e-05 0.9999999 2
## 967 GO:0009744 3.347001e-05 0.9999989 4
## 646 GO:0006979 5.215913e-05 0.9999956 6
## 2829 GO:0051511 6.257254e-05 0.9999998 2
## 1282 GO:0010325 1.318124e-04 0.9999994 2
## 736 GO:0007568 1.789968e-04 0.9999956 3
## 3081 GO:0071219 1.826884e-04 0.9999990 2
## 441 GO:0006499 2.422844e-04 0.9999934 3
## numInCat
## 1695 7
## 1292 4
## 3096 4
## 3104 4
## 3197 4
## 1838 5
## 859 596
## 2844 6
## 1619 6
## 1840 7
## 3201 7
## 1248 6
## 3093 7
## 2162 7
## 967 136
## 646 502
## 2829 9
## 1282 11
## 736 85
## 3081 14
## 441 89
## term
## 1695 tryptophan catabolic process to kynurenine
## 1292 cellular response to magnesium starvation
## 3096 cellular response to magnesium ion
## 3104 cellular response to mannitol stimulus
## 3197 cellular response to sorbitol
## 1838 negative regulation of microtubule polymerization
## 859 response to water deprivation
## 2844 response to calcium ion
## 1619 N-terminal peptidyl-glycine N-myristoylation
## 1840 positive regulation of microtubule depolymerization
## 3201 intracellular transport of virus
## 1248 establishment or maintenance of transmembrane electrochemical gradient
## 3093 cellular response to copper ion
## 2162 cellular response to potassium ion
## 967 response to sucrose
## 646 response to oxidative stress
## 2829 negative regulation of unidimensional cell growth
## 1282 raffinose family oligosaccharide biosynthetic process
## 736 aging
## 3081 cellular response to molecule of bacterial origin
## 441 N-terminal protein myristoylation
## ontology over_represented_padjust
## 1695 BP 0.0003648565
## 1292 BP 0.0068554769
## 3096 BP 0.0068554769
## 3104 BP 0.0068554769
## 3197 BP 0.0068554769
## 1838 BP 0.0080163051
## 859 BP 0.0080163051
## 2844 BP 0.0080163051
## 1619 BP 0.0080163051
## 1840 BP 0.0080163051
## 3201 BP 0.0080163051
## 1248 BP 0.0080163051
## 3093 BP 0.0080163051
## 2162 BP 0.0080163051
## 967 BP 0.0084545246
## 646 BP 0.0123519337
## 2829 BP 0.0139463151
## 1282 BP 0.0277465042
## 736 BP 0.0346103103
## 3081 BP 0.0346103103
## 441 BP 0.0437150254
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 853 GO:0009407 7.21299e-06 0.9999998 4
## numInCat term ontology over_represented_padjust
## 853 67 toxin catabolic process BP 0.02733002
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1572 GO:0016310 4.546280e-06 0.9999993 12
## 2750 GO:0050832 9.987204e-06 0.9999988 9
## 423 GO:0006468 1.451836e-05 0.9999962 18
## numInCat term ontology over_represented_padjust
## 1572 685 phosphorylation BP 0.01722585
## 2750 469 defense response to fungus BP 0.01833669
## 423 1484 protein phosphorylation BP 0.01833669
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 1166 GO:0010112 5.131427e-11 1.0000000 7
## 638 GO:0006952 1.205807e-09 1.0000000 28
## 890 GO:0009611 4.343094e-07 0.9999999 14
## 899 GO:0009626 4.232341e-06 0.9999996 8
## 973 GO:0009753 7.729912e-06 0.9999988 11
## 2253 GO:0042742 1.663149e-05 0.9999959 16
## 1205 GO:0010193 1.870619e-05 0.9999991 5
## 2237 GO:0042538 9.358914e-05 0.9999910 6
## numInCat term ontology
## 1166 22 regulation of systemic acquired resistance BP
## 638 1165 defense response BP
## 890 419 response to wounding BP
## 899 140 plant-type hypersensitive response BP
## 973 338 response to jasmonic acid BP
## 2253 726 defense response to bacterium BP
## 1205 54 response to ozone BP
## 2237 132 hyperosmotic salinity response BP
## over_represented_padjust
## 1166 1.944298e-07
## 638 2.284402e-06
## 890 5.485327e-04
## 899 4.009085e-03
## 973 5.857727e-03
## 2253 1.012539e-02
## 1205 1.012539e-02
## 2237 4.432616e-02
## Warning in pcls(G): initial point very close to some inequality constraints
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
## [1] "enriched.GO is"
## category over_represented_pvalue under_represented_pvalue numDEInCat
## 946 GO:0009699 5.793939e-06 0.9999999 4
## 890 GO:0009611 6.268525e-06 0.9999993 9
## 602 GO:0006855 7.426144e-06 0.9999996 6
## numInCat term ontology
## 946 34 phenylpropanoid biosynthetic process BP
## 890 419 response to wounding BP
## 602 130 drug transmembrane transport BP
## over_represented_padjust
## 946 0.00937922
## 890 0.00937922
## 602 0.00937922
# convert list to data.frame
temp %>% enframe(name="cluster") %>% unnest(value) %>% write_csv(path="../output/twoafternoon.any.trtsoil.DEG.Kmeans.15cluster.csv")